viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
Filename | |
---|---|
src/executables/Fetcher.php |
diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php index 5c7996301..4e8171707 100755 --- a/src/executables/Fetcher.php +++ b/src/executables/Fetcher.php @@ -1398,7 +1398,7 @@ class Fetcher implements CrawlConstants */ $cs = $this->current_server; if (!$at_least_once) { - $next_server = ($cs + 1) % $num_servers; + $this->current_server = ($cs + 1) % $num_servers; } do { if ($at_least_once || @@ -1411,7 +1411,6 @@ class Fetcher implements CrawlConstants $cs = ($cs + 1) % $num_servers; $i++; } while($this->exceedMemoryThreshold() && $i < $num_servers); - $this->current_server = $next_server; } /** * Sets parameters for fetching based on provided info struct @@ -2621,11 +2620,11 @@ class Fetcher implements CrawlConstants * a delay until the post is successful. At this point, memory for this data * is freed. * - * @param string $current_server queue server to update + * @param string $server index of queue server to update */ - public function updateScheduler($current_server) + public function updateScheduler($server) { - $queue_server = $this->queue_servers[$current_server]; + $queue_server = $this->queue_servers[$server]; L\crawlLog("Updating machine: " . $queue_server); $prefix = $this->fetcher_num . "-" . $this->channel . "-"; if (count($this->to_crawl) <= 0) { @@ -2670,11 +2669,11 @@ class Fetcher implements CrawlConstants } //handle schedule data $schedule_data = []; - if (isset($this->found_sites[self::TO_CRAWL][$current_server])) { + if (isset($this->found_sites[self::TO_CRAWL][$server])) { $schedule_data[self::TO_CRAWL] = & - $this->found_sites[self::TO_CRAWL][$current_server]; + $this->found_sites[self::TO_CRAWL][$server]; } - unset($this->found_sites[self::TO_CRAWL][$current_server]); + unset($this->found_sites[self::TO_CRAWL][$server]); $seen_cnt = 0; if (isset($this->found_sites[self::SEEN_URLS]) && ($seen_cnt = count($this->found_sites[self::SEEN_URLS])) > 0 ) { @@ -2701,22 +2700,22 @@ class Fetcher implements CrawlConstants unset($schedule_data); //handle mini inverted index if ($seen_cnt > 0 ) { - $this->buildMiniInvertedIndex($current_server); + $this->buildMiniInvertedIndex($server); } - if (isset($this->found_sites[self::INVERTED_INDEX][$current_server])) { + if (isset($this->found_sites[self::INVERTED_INDEX][$server])) { L\crawlLog("Saving Mini Inverted Index..."); - $this->found_sites[self::INVERTED_INDEX][$current_server] = + $this->found_sites[self::INVERTED_INDEX][$server] = $this->found_sites[self::INVERTED_INDEX][ - $current_server]->save(true, true); - $compress_urls = $this->compressAndUnsetSeenUrls($current_server); + $server]->save(true, true); + $compress_urls = $this->compressAndUnsetSeenUrls($server); $len_urls = strlen($compress_urls); L\crawlLog("...Finish Compressing seen URLs."); $out_string = L\packInt($len_urls) . $compress_urls; unset($compress_urls); $out_string .= $this->found_sites[self::INVERTED_INDEX][ - $current_server]; + $server]; L\crawlLog(".....add inverted index string."); - unset($this->found_sites[self::INVERTED_INDEX][$current_server]); + unset($this->found_sites[self::INVERTED_INDEX][$server]); L\garbageCollect(); $data = L\webencode($out_string); L\crawlLog(".....web encode result."); @@ -2750,27 +2749,25 @@ class Fetcher implements CrawlConstants * links destined for the current queue server. Then unsets these * values from $this->found_sites * - * @param string $current_server server to compress and unset urls for + * @param int $server index of queue server to compress and unset urls for * @return string of compressed urls */ - public function compressAndUnsetSeenUrls($current_server) + public function compressAndUnsetSeenUrls($server) { $compress_urls = ""; - if (!isset($this->found_sites[self::LINK_SEEN_URLS][ - $current_server])) { - $this->found_sites[self::LINK_SEEN_URLS][$current_server] = - []; + if (!isset($this->found_sites[self::LINK_SEEN_URLS][$server])) { + $this->found_sites[self::LINK_SEEN_URLS][$server] = []; } if (isset($this->found_sites[self::SEEN_URLS]) && is_array($this->found_sites[self::SEEN_URLS])) { $this->found_sites[self::SEEN_URLS] = array_merge($this->found_sites[self::SEEN_URLS], - $this->found_sites[self::LINK_SEEN_URLS][$current_server]); + $this->found_sites[self::LINK_SEEN_URLS][$server]); } else { $this->found_sites[self::SEEN_URLS] = - $this->found_sites[self::LINK_SEEN_URLS][$current_server]; + $this->found_sites[self::LINK_SEEN_URLS][$server]; } - $this->found_sites[self::LINK_SEEN_URLS][$current_server] = + $this->found_sites[self::LINK_SEEN_URLS][$server] = []; if (isset($this->found_sites[self::SEEN_URLS])) { $num_seen = count($this->found_sites[self::SEEN_URLS]); @@ -2968,9 +2965,9 @@ class Fetcher implements CrawlConstants * burden on the queue server. The resulting mini index computed by * buildMiniInvertedIndex() is stored in * $this->found_sites[self::INVERTED_INDEX] - * @param string $current_server queue server to build inverted index for + * @param string $server index of queue server to build inverted index for */ - public function buildMiniInvertedIndex($current_server) + public function buildMiniInvertedIndex($server) { $start_time = microtime(true); $keypad = "\x00\x00\x00\x00"; @@ -2982,9 +2979,9 @@ class Fetcher implements CrawlConstants for the fetcher we are not saving the index shards so name doesn't matter. */ - if (!isset($this->found_sites[self::INVERTED_INDEX][$current_server])) { - $this->found_sites[self::INVERTED_INDEX][$current_server] = - new IndexShard("fetcher_shard_{$current_server}"); + if (!isset($this->found_sites[self::INVERTED_INDEX][$server])) { + $this->found_sites[self::INVERTED_INDEX][$server] = + new IndexShard("fetcher_shard_{$server}"); } for ($i = 0; $i < $num_seen; $i++) { $interim_time = microtime(true); @@ -3099,7 +3096,7 @@ class Fetcher implements CrawlConstants $user_ranks = (empty($site[self::USER_RANKS])) ? [] : $site[self::USER_RANKS]; - $this->found_sites[self::INVERTED_INDEX][$current_server + $this->found_sites[self::INVERTED_INDEX][$server ]->addDocumentWords($doc_keys, self::NEEDS_OFFSET_FLAG, $word_lists, $meta_ids, true, $doc_rank, $description_scores, $user_ranks);