viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
Filename | |
---|---|
src/configs/Config.php | |
src/controllers/FetchController.php | |
src/executables/QueueServer.php | |
src/locale/en_US/configure.ini | |
src/models/SearchfiltersModel.php |
diff --git a/src/configs/Config.php b/src/configs/Config.php index aa2be2f1c..4fe1275b3 100755 --- a/src/configs/Config.php +++ b/src/configs/Config.php @@ -172,7 +172,7 @@ nsdefine('MIN_AD_VERSION', 36); nsdefine('RESOURCES_WIKI_VERSION', 5); /** * nsdefine's the BASE_URL constant for this script - * if run from the command line as part of index.php HTTP server scrip + * if run from the command line as part of index.php HTTP server script * set the current working directory as well */ function initializeBaseUrlAndCurrentWorkingDirectory() @@ -224,14 +224,17 @@ function initializeBaseUrlAndCurrentWorkingDirectory() $port = ( ($http == "http://" && ($server_port != 80) || ($http == "https://" && $server_port != 443))) ? ":" . $server_port : ""; - if (nsdefined('SERVER_CONTEXT')) {; - $context = SERVER_CONTEXT; - if (!empty($context['SERVER_NAME'])) { - $_SERVER['SERVER_NAME'] = $context['SERVER_NAME']; - } + if (nsdefined('SERVER_CONTEXT')) { + $context = SERVER_CONTEXT; + if (!empty($context['SERVER_NAME'])) { + $_SERVER['SERVER_NAME'] = $context['SERVER_NAME']; } + } $server_name = isset($_SERVER['SERVER_NAME']) ? $_SERVER['SERVER_NAME'] : "localhost"; + if (nsdefined('NAME_SERVER') && NAME_SERVER == "www." . $server_name) { + $server_name = NAME_SERVER; + } if (strpos($server_name, ":") !== false && $server_name[0] != '[') { $server_name = "[$server_name]"; //guessing ipv6 address } @@ -266,7 +269,6 @@ if (file_exists(BASE_DIR . "/configs/LocalConfig.php")) { way to set work directory) */ require_once(BASE_DIR . "/configs/LocalConfig.php"); } -initializeBaseUrlAndCurrentWorkingDirectory(); /** Yioop Namespace*/ nsdefine('NS', "seekquarry\\yioop\\"); /** controllers sub-namespace */ @@ -346,7 +348,9 @@ nsconddefine('MAINTENANCE_MESSAGE', <<<EOD This Yioop! installation is undergoing maintenance, please come back later! EOD ); -if (MAINTENANCE_MODE && $_SERVER["SERVER_ADDR"] != $_SERVER["REMOTE_ADDR"]) { +if (MAINTENANCE_MODE && !empty($_SERVER["SERVER_ADDR"]) && + !empty($_SERVER["REMOTE_ADDR"]) && + $_SERVER["SERVER_ADDR"] != $_SERVER["REMOTE_ADDR"]) { echo MAINTENANCE_MESSAGE; exit(); } @@ -408,6 +412,7 @@ if (file_exists(WORK_DIRECTORY . PROFILE_FILE_NAME)) { file_put_contents(WORK_DIRECTORY . PROFILE_FILE_NAME, $new_profile); } require_once WORK_DIRECTORY . PROFILE_FILE_NAME; + initializeBaseUrlAndCurrentWorkingDirectory(); nsdefine('PROFILE', true); nsdefine('CRAWL_DIR', WORK_DIRECTORY); if (is_dir(APP_DIR."/locale")) { @@ -430,6 +435,7 @@ if (file_exists(WORK_DIRECTORY . PROFILE_FILE_NAME)) { nsdefine("FIX_NAME_SERVER", true); } } else { + initializeBaseUrlAndCurrentWorkingDirectory(); if ((!isset( $_SERVER['SERVER_NAME']) || $_SERVER['SERVER_NAME']!=='localhost') && !nsdefined("NO_LOCAL_CHECK") && !nsdefined("WORK_DIRECTORY") diff --git a/src/controllers/FetchController.php b/src/controllers/FetchController.php index 6c1d2d0a8..beb10ac37 100755 --- a/src/controllers/FetchController.php +++ b/src/controllers/FetchController.php @@ -299,7 +299,7 @@ class FetchController extends Controller implements CrawlConstants if (file_exists($this->crawl_status_file_name)) { $crawl_status = unserialize(file_get_contents( $this->crawl_status_file_name)); - if ($crawl_status['CRAWL_TIME'] != 0) { + if (!empty($crawl_status['CRAWL_TIME'])) { $restart = false; } } diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php index 3daea00e0..30de04c36 100755 --- a/src/executables/QueueServer.php +++ b/src/executables/QueueServer.php @@ -1773,7 +1773,7 @@ class QueueServer implements CrawlConstants, Join static $blocked = false; if ($blocking && $blocked) { L\crawlLog("Indexer waiting for merge tiers to ". - "complete before write partition. B"); + "complete before write partition."); return; } if (!$blocking) { @@ -1785,10 +1785,10 @@ class QueueServer implements CrawlConstants, Join L\crawlLog("Indexer: Processing index data in $file..."); $start_time = microtime(true); $start_total_time = microtime(true); - $pre_sites = L\webdecode(file_get_contents($file)); - $len_urls = L\unpackInt(substr($pre_sites, 0, 4)); - $seen_urls_string = substr($pre_sites, 4, $len_urls); - $pre_sites = substr($pre_sites, 4 + $len_urls); + $pre_sites_and_index = L\webdecode(file_get_contents($file)); + $len_urls = L\unpackInt(substr($pre_sites_and_index, 0, 4)); + $seen_urls_string = substr($pre_sites_and_index, 4, $len_urls); + $pre_sites_and_index = substr($pre_sites_and_index, 4 + $len_urls); $sites[self::SEEN_URLS] = []; $pos = 0; $num = 0; @@ -1828,12 +1828,30 @@ class QueueServer implements CrawlConstants, Join return; } L\crawlLog("A. Indexer Load SEEN_URLS. Memory usage:". - memory_get_usage() ." time: ".L\changeInMicrotime($start_time)); + memory_get_usage() ." time: " . L\changeInMicrotime($start_time)); $sites[self::INVERTED_INDEX] = IndexShard::load("fetcher_shard", - $pre_sites); - unset($pre_sites); + $pre_sites_and_index); + if (empty($sites[self::INVERTED_INDEX])) { + L\crawlLog("Index data file inverted index empty or corrupt."); + L\crawlLog("Indexer Done Index Processing File: $file. " . + "Total time: " . L\changeInMicrotime($start_total_time)); + unlink($file); + return; + } + $index_shard = $sites[self::INVERTED_INDEX]; + $generation = $this->index_archive->initGenerationToAdd( + $index_shard->num_docs, $this, $blocking); + if ($generation == -1) { + L\crawlLog("Indexer waiting for merge tiers to ". + "complete before write partition. A"); + $blocked = true; + // In this case if we block, will end up reprocess file + return; /* if don't return here can process rest of + method */ + } + unset($pre_sites_and_index); L\crawlLog("B. Indexer Load Sent shard. Memory usage:". - memory_get_usage() ." time: ".(L\changeInMicrotime($start_time))); + memory_get_usage() ." time: " . (L\changeInMicrotime($start_time))); $start_time = microtime(true); //do deduplication of summaries if (isset($sites[self::SEEN_URLS]) && @@ -1860,7 +1878,7 @@ class QueueServer implements CrawlConstants, Join L\crawlHash($link_url_parts[1], true) . L\crawlHash($seen_sites[$i][self::URL], true) . $reftype . substr(L\crawlHash( - UrlParser::getHost($link_url_parts[5]) . "/", true), 1); + UrlParser::getHost($link_url_parts[5]) . "/", true), 1); $seen_sites[$i][self::IS_DOC] = false; } else { $seen_sites[$i][self::IS_DOC] = true; @@ -1872,40 +1890,29 @@ class QueueServer implements CrawlConstants, Join $recent_urls_count++; } } - if (isset($sites[self::INVERTED_INDEX])) { - $index_shard = $sites[self::INVERTED_INDEX]; - $generation = $this->index_archive->initGenerationToAdd( - $index_shard->num_docs, $this, $blocking); - if ($generation == -1) { - L\crawlLog("Indexer waiting for merge tiers to ". - "complete before write partition. A"); - $blocked = true; - return; - } - $summary_offsets = []; - if (isset($seen_sites)) { - $this->index_archive->addPages( - $generation, self::SUMMARY_OFFSET, $seen_sites, - $visited_urls_count); - foreach ($seen_sites as $site) { - if ($site[self::IS_DOC]) { // so not link - $site_url = str_replace('|', "%7C", $site[self::URL]); - $host = UrlParser::getHost($site_url); - $hash = L\crawlHash($site_url, true). - $site[self::HASH] . - "d". substr(L\crawlHash($host."/", true), 1); - } else { - $hash = $site[self::HASH_URL]; - } - $summary_offsets[$hash] = $site[self::SUMMARY_OFFSET]; + $summary_offsets = []; + if (!empty($seen_sites)) { + $this->index_archive->addPages($generation, self::SUMMARY_OFFSET, + $seen_sites, $visited_urls_count); + foreach ($seen_sites as $site) { + if ($site[self::IS_DOC]) { // so not link + $site_url = str_replace('|', "%7C", $site[self::URL]); + $host = UrlParser::getHost($site_url); + $hash = L\crawlHash($site_url, true) . $site[self::HASH] . + "d". substr(L\crawlHash($host . "/", true), 1); + } else { + $hash = $site[self::HASH_URL]; } - unset($seen_sites); + $summary_offsets[$hash] = $site[self::SUMMARY_OFFSET]; } - L\crawlLog("C. Indexer init local shard, store ". - "Summaries memory usage: ". memory_get_usage() . - " time: " . L\changeInMicrotime($start_time)); - $start_time = microtime(true); - // added summary offset info to inverted index data + unset($seen_sites); + } + L\crawlLog("C. Indexer init local shard, store " . + "Summaries memory usage: ". memory_get_usage() . + " time: " . L\changeInMicrotime($start_time)); + $start_time = microtime(true); + // added summary offset info to inverted index data + if (!empty($summary_offsets)) { $index_shard->changeDocumentOffsets($summary_offsets); L\crawlLog("D. Indexer Update shard offsets. Memory usage: ". memory_get_usage() . " time: " . @@ -1913,16 +1920,16 @@ class QueueServer implements CrawlConstants, Join $start_time = microtime(true); $this->index_archive->addIndexData($index_shard); $this->index_dirty = true; + L\crawlLog("E. Indexer Add index shard. Memory usage: ". + memory_get_usage() . " time: " . + L\changeInMicrotime($start_time)); } - L\crawlLog("E. Indexer Add index shard. Memory usage: ". - memory_get_usage() . " time: " . - L\changeInMicrotime($start_time)); - L\crawlLog("Indexer Done Index Processing File: $file. Total time: ". - L\changeInMicrotime($start_total_time)); if (isset($recent_urls)) { $sites[self::RECENT_URLS] = $recent_urls; $this->writeCrawlStatus($sites); } + L\crawlLog("Indexer Done Index Processing File: $file. Total time: ". + L\changeInMicrotime($start_total_time)); if (file_exists($file)) { //Haven't tracked down yet, but can try to delete twice giving warn unlink($file); diff --git a/src/locale/en_US/configure.ini b/src/locale/en_US/configure.ini index 8939419f3..8b6be01b0 100644 --- a/src/locale/en_US/configure.ini +++ b/src/locale/en_US/configure.ini @@ -1444,7 +1444,7 @@ trending_element_term = "Term" trending_element_score = "Score" trending_element_date = "Computed %s" trending_element_hourly_trend = "Hourly Trend Score for '%s' for Last Day" -trending_element_daily_trend = "Dailly Trend Score for '%s' for Last Week" +trending_element_daily_trend = "Daily Trend Score for '%s' for Last Week" ; ; ManageusersElement.php manageusers_element_users = "User List" diff --git a/src/models/SearchfiltersModel.php b/src/models/SearchfiltersModel.php index 04fbec6da..bcd9a893f 100644 --- a/src/models/SearchfiltersModel.php +++ b/src/models/SearchfiltersModel.php @@ -60,8 +60,7 @@ class SearchfiltersModel extends Model $this->dir_name = C\CRAWL_DIR . "/search_filters"; if (!file_exists(C\CRAWL_DIR . "/search_filters")) { mkdir($this->dir_name); - $this->db->setWorldPermissionsRecursive( - $this->dir_name, true); + $this->db->setWorldPermissionsRecursive($this->dir_name, true); } } /**