viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
diff --git a/src/controllers/FetchController.php b/src/controllers/FetchController.php index a50e96a63..6c1d2d0a8 100755 --- a/src/controllers/FetchController.php +++ b/src/controllers/FetchController.php @@ -288,12 +288,13 @@ class FetchController extends Controller implements CrawlConstants $crawl_time = 0; $check_crawl_time = 0; } + $channel = $this->getChannel(); $index_schedule_file = C\CRAWL_DIR . "/schedules/" . self::index_closed_name . $crawl_time . ".txt"; if ($crawl_time > 0 && file_exists($index_schedule_file) && $check_crawl_time > intval(fileatime($index_schedule_file)) && !file_exists(C\CRAWL_DIR . - "/schedules/QueueServerMessages.txt") ) { + "/schedules/$channel-QueueServerMessages.txt") ) { $restart = true; if (file_exists($this->crawl_status_file_name)) { $crawl_status = unserialize(file_get_contents( diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php index 3798f8562..8ad9f87d2 100755 --- a/src/controllers/SearchController.php +++ b/src/controllers/SearchController.php @@ -321,7 +321,7 @@ class SearchController extends Controller implements CrawlConstants $this->subsearch_identifier = $search["INDEX_IDENTIFIER"]; if (!isset($_REQUEST['num']) && isset($search["PER_PAGE"])) { - $_REQUEST['num']= $search["PER_PAGE"]; + $_REQUEST['num'] = $search["PER_PAGE"]; } break; } @@ -864,8 +864,7 @@ class SearchController extends Controller implements CrawlConstants if (!empty($this->subsearch_name)) { $data['PAGING_QUERY']['s'] = $this->subsearch_name; } - $data['QUERY'] = urlencode($this->clean($data['QUERY'], - "string")); + $data['QUERY'] = urlencode($data['QUERY']); break; case "query": // no break @@ -885,7 +884,7 @@ class SearchController extends Controller implements CrawlConstants if (!empty($this->subsearch_name)) { $data['PAGING_QUERY']['s'] = $this->subsearch_name; } - $data['QUERY'] = urlencode($this->clean($query, "string")); + $data['QUERY'] = urlencode($query); if ((php_sapi_name() != 'cli' || C\nsdefined("IS_OWN_WEB_SERVER")) && C\nsdefined("MONETIZATION_TYPE") && diff --git a/src/controllers/components/CrawlComponent.php b/src/controllers/components/CrawlComponent.php index 35482421c..15cdc19a2 100644 --- a/src/controllers/components/CrawlComponent.php +++ b/src/controllers/components/CrawlComponent.php @@ -328,7 +328,7 @@ class CrawlComponent extends Component implements CrawlConstants is_array($crawl_params[self::INDEXING_PLUGINS])) { foreach ($crawl_params[self::INDEXING_PLUGINS] as $plugin) { if ($plugin == "") {continue;} - $plugin_class = C\NS_PLUGINS . $plugin."Plugin"; + $plugin_class = C\NS_PLUGINS . $plugin . "Plugin"; $plugin_obj = $parent->plugin(lcfirst($plugin)); if (method_exists($plugin_class, "loadConfiguration")) { $crawl_params[self::INDEXING_PLUGINS_DATA][$plugin] = diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php index a5e84143c..9a6a5ed3f 100755 --- a/src/executables/Fetcher.php +++ b/src/executables/Fetcher.php @@ -665,7 +665,9 @@ class Fetcher implements CrawlConstants $info[self::STATUS] being set */ if (!isset($info[self::STATUS])) { - if ($info === true) {$info = [];} + if ($info === true) { + $info = []; + } $info[self::STATUS] = self::CONTINUE_STATE; } if ($info[self::STATUS] == self::NO_DATA_STATE) { diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php index b312340b9..96a14b448 100755 --- a/src/executables/QueueServer.php +++ b/src/executables/QueueServer.php @@ -336,6 +336,7 @@ class QueueServer implements CrawlConstants, Join $this->archive_modified_time = 0; $this->crawl_time = 0; $this->channel = 0; + $this->repeat_type = -1; $this->robots_txt = C\ALWAYS_FOLLOW_ROBOTS; $this->cache_pages = true; $this->page_recrawl_frequency = C\PAGE_RECRAWL_FREQUENCY; @@ -397,7 +398,11 @@ class QueueServer implements CrawlConstants, Join "schedule_status.txt"]; foreach ($old_message_names as $name) { if (file_exists(C\CRAWL_DIR."/schedules/{$this->channel}-$name")) { - @unlink(C\CRAWL_DIR."/schedules/{$this->channel}-$name"); + @unlink(C\CRAWL_DIR . "/schedules/{$this->channel}-$name"); + $remove = true; + } + if (file_exists(C\CRAWL_DIR."/schedules/$name")) { + @unlink(C\CRAWL_DIR . "/schedules/$name"); $remove = true; } } @@ -587,7 +592,7 @@ class QueueServer implements CrawlConstants, Join $init_args = ["QueueServer.php", "start", $this->channel, $process]; L\crawlLog( "!!!!Writing to $error_log ". "crash message about $process..."); - CrawlDaemon::init( $init_args, "QueueServer", -3); + CrawlDaemon::init($init_args, "QueueServer", -3); if ($info[self::STATUS] != self::WAITING_START_MESSAGE_STATE) { L\crawlLog("Sleeping before sending restart message other process"); sleep(2 * C\QUEUE_SLEEP_TIME); @@ -595,6 +600,7 @@ class QueueServer implements CrawlConstants, Join $crawl_params[self::STATUS] = "RESUME_CRAWL"; $crawl_params[self::CRAWL_TIME] = $this->crawl_time; $crawl_params[self::CRAWL_TYPE] = $this->crawl_type; + $crawl_params[self::REPEAT_TYPE] = $this->repeat_type; $crawl_params[self::CHANNEL] = $this->channel; $info_string = serialize($crawl_params); $process_message_file = C\CRAWL_DIR . "/schedules/" . @@ -737,7 +743,7 @@ class QueueServer implements CrawlConstants, Join $max_links = max(C\MAX_LINKS_PER_PAGE, C\MAX_LINKS_PER_SITEMAP); if ($count < C\NUM_URLS_QUEUE_RAM - C\SEEN_URLS_BEFORE_UPDATE_SCHEDULER * $max_links) { - $info = $this->processQueueUrls(); + $this->processQueueUrls(); } if ($count > 0) { $top = $this->web_queue->peekQueue(); @@ -758,7 +764,7 @@ class QueueServer implements CrawlConstants, Join been taken by some fetcher */ if (!file_exists( - C\CRAWL_DIR."/schedules/" . self::schedule_name. + C\CRAWL_DIR . "/schedules/" . self::schedule_name. $this->crawl_time . ".txt")) { $this->produceFetchBatch(); } @@ -984,7 +990,7 @@ class QueueServer implements CrawlConstants, Join break; case "RESUME_CRAWL": if (isset($info[self::CRAWL_TIME]) && - (file_exists(C\CRAWL_DIR.'/cache/'. + (file_exists(C\CRAWL_DIR . '/cache/'. self::queue_base_name . $info[self::CRAWL_TIME]) || $info[self::CRAWL_TYPE] == self::ARCHIVE_CRAWL) ) { if ($old_info[self::STATUS] == self::CONTINUE_STATE) { @@ -1300,8 +1306,6 @@ class QueueServer implements CrawlConstants, Join $this->waiting_hosts = []; $this->initializeWebQueue(); $this->initializeIndexBundle($info, $try_to_set_from_old_index); - $info[self::STATUS] = self::CONTINUE_STATE; - return $info; } /** * Function used to set up an indexer's IndexArchiveBundle or @@ -1320,7 +1324,7 @@ class QueueServer implements CrawlConstants, Join if ($try_to_set_from_old_index === null) { $try_to_set_from_old_index = array_keys(self::$info_parameter_map); } - if(empty($this->repeat_type) || $this->repeat_type < 0) { + if(empty($this->repeat_type) || $this->repeat_type <= 0) { $class_name = C\NS_LIB . "IndexArchiveBundle"; $dir = C\CRAWL_DIR . '/cache/' . self::index_data_base_name . $this->crawl_time; @@ -1351,7 +1355,7 @@ class QueueServer implements CrawlConstants, Join (might take a while if merging dictionary) */ $this->writeCrawlStatus($sites); - } else if (!empty($this->repeat_type) && $this->repeat_type >= 0) { + } else if (!empty($this->repeat_type) && $this->repeat_type > 0) { $this->index_archive = new $class_name($dir, false, serialize($info), C\NUM_DOCS_PER_GENERATION, $this->repeat_type); @@ -2087,15 +2091,11 @@ class QueueServer implements CrawlConstants, Join * Checks for a new crawl file or a schedule data for the current crawl and * if such a exists then processes its contents adding the relevant urls to * the priority queue - * - * @return array info array with continue status */ public function processQueueUrls() { L\crawlLog("Scheduler Start checking for new URLs data memory usage: ". memory_get_usage()); - $info = []; - $info[self::STATUS] = self::CONTINUE_STATE; $start_schedule_filename = C\CRAWL_DIR . "/schedules/" . $this->channel . "-" . self::schedule_start_name; if (file_exists($start_schedule_filename)) { @@ -2107,13 +2107,11 @@ class QueueServer implements CrawlConstants, Join L\crawlLog("Scheduler Start schedule urls" . $start_schedule_filename); $this->processDataArchive($start_schedule_filename); - return $info; } $schedule_dir = C\CRAWL_DIR."/schedules/" . self::schedule_data_base_name . $this->crawl_time; $this->processDataFile($schedule_dir, "processDataArchive"); L\crawlLog("done."); - return $info; } /** * Process a file of to-crawl urls adding to or adjusting the weight in diff --git a/src/library/BloomFilterBundle.php b/src/library/BloomFilterBundle.php index 04ae13dc0..eb36e302b 100644 --- a/src/library/BloomFilterBundle.php +++ b/src/library/BloomFilterBundle.php @@ -151,7 +151,7 @@ class BloomFilterBundle } for ($j = 0; $j < $count; $j++) { if ($field_names === null) { - $tmp = $arr[$j]; + $tmp = & $arr[$j]; if ($tmp !== false && $tmp_filter->contains($tmp)) { /* We deliberately don't try to add anything that has @@ -164,7 +164,7 @@ class BloomFilterBundle } } else { //now do the same strategy for the array of fields case foreach ($field_names as $field_name) { - $tmp = $arr[$j][$field_name]; + $tmp = & $arr[$j][$field_name]; if ($tmp !== false && $tmp_filter->contains($tmp)) { unset($arr[$j]); break;