viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

Try to fix other ambiguities in what is the current queue server for a fetcher processing a fetch batch 3, a=chris

Chris Pollett [2020-07-20 01:Jul:th]
Try to fix other ambiguities in what is the current queue server for a fetcher processing a fetch batch 3, a=chris
Filename
src/executables/Fetcher.php
diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php
index 5c7996301..4e8171707 100755
--- a/src/executables/Fetcher.php
+++ b/src/executables/Fetcher.php
@@ -1398,7 +1398,7 @@ class Fetcher implements CrawlConstants
          */
         $cs = $this->current_server;
         if (!$at_least_once) {
-            $next_server = ($cs + 1) % $num_servers;
+            $this->current_server = ($cs + 1) % $num_servers;
         }
         do {
             if ($at_least_once ||
@@ -1411,7 +1411,6 @@ class Fetcher implements CrawlConstants
             $cs = ($cs + 1) % $num_servers;
             $i++;
         } while($this->exceedMemoryThreshold() && $i < $num_servers);
-        $this->current_server = $next_server;
     }
     /**
      * Sets parameters for fetching based on provided info struct
@@ -2621,11 +2620,11 @@ class Fetcher implements CrawlConstants
      * a delay until the post is successful. At this point, memory for this data
      * is freed.
      *
-     * @param string $current_server queue server to update
+     * @param string $server index of queue server to update
      */
-    public function updateScheduler($current_server)
+    public function updateScheduler($server)
     {
-        $queue_server = $this->queue_servers[$current_server];
+        $queue_server = $this->queue_servers[$server];
         L\crawlLog("Updating machine: " . $queue_server);
         $prefix = $this->fetcher_num . "-" . $this->channel . "-";
         if (count($this->to_crawl) <= 0) {
@@ -2670,11 +2669,11 @@ class Fetcher implements CrawlConstants
         }
         //handle schedule data
         $schedule_data = [];
-        if (isset($this->found_sites[self::TO_CRAWL][$current_server])) {
+        if (isset($this->found_sites[self::TO_CRAWL][$server])) {
             $schedule_data[self::TO_CRAWL] = &
-                $this->found_sites[self::TO_CRAWL][$current_server];
+                $this->found_sites[self::TO_CRAWL][$server];
         }
-        unset($this->found_sites[self::TO_CRAWL][$current_server]);
+        unset($this->found_sites[self::TO_CRAWL][$server]);
         $seen_cnt = 0;
         if (isset($this->found_sites[self::SEEN_URLS]) &&
             ($seen_cnt = count($this->found_sites[self::SEEN_URLS])) > 0 ) {
@@ -2701,22 +2700,22 @@ class Fetcher implements CrawlConstants
         unset($schedule_data);
         //handle mini inverted index
         if ($seen_cnt > 0 ) {
-            $this->buildMiniInvertedIndex($current_server);
+            $this->buildMiniInvertedIndex($server);
         }
-        if (isset($this->found_sites[self::INVERTED_INDEX][$current_server])) {
+        if (isset($this->found_sites[self::INVERTED_INDEX][$server])) {
             L\crawlLog("Saving Mini Inverted Index...");
-            $this->found_sites[self::INVERTED_INDEX][$current_server] =
+            $this->found_sites[self::INVERTED_INDEX][$server] =
                 $this->found_sites[self::INVERTED_INDEX][
-                    $current_server]->save(true, true);
-            $compress_urls = $this->compressAndUnsetSeenUrls($current_server);
+                    $server]->save(true, true);
+            $compress_urls = $this->compressAndUnsetSeenUrls($server);
             $len_urls =  strlen($compress_urls);
             L\crawlLog("...Finish Compressing seen URLs.");
             $out_string = L\packInt($len_urls) . $compress_urls;
             unset($compress_urls);
             $out_string .= $this->found_sites[self::INVERTED_INDEX][
-                $current_server];
+                $server];
             L\crawlLog(".....add inverted index string.");
-            unset($this->found_sites[self::INVERTED_INDEX][$current_server]);
+            unset($this->found_sites[self::INVERTED_INDEX][$server]);
             L\garbageCollect();
             $data = L\webencode($out_string);
             L\crawlLog(".....web encode result.");
@@ -2750,27 +2749,25 @@ class Fetcher implements CrawlConstants
      * links destined for the current queue server. Then unsets these
      * values from $this->found_sites
      *
-     * @param string $current_server server to compress and unset urls for
+     * @param int $server index of queue server to compress and unset urls for
      * @return string of compressed urls
      */
-    public function compressAndUnsetSeenUrls($current_server)
+    public function compressAndUnsetSeenUrls($server)
     {
         $compress_urls = "";
-        if (!isset($this->found_sites[self::LINK_SEEN_URLS][
-            $current_server])) {
-            $this->found_sites[self::LINK_SEEN_URLS][$current_server] =
-                [];
+        if (!isset($this->found_sites[self::LINK_SEEN_URLS][$server])) {
+            $this->found_sites[self::LINK_SEEN_URLS][$server] = [];
         }
         if (isset($this->found_sites[self::SEEN_URLS]) &&
             is_array($this->found_sites[self::SEEN_URLS])) {
             $this->found_sites[self::SEEN_URLS] =
                 array_merge($this->found_sites[self::SEEN_URLS],
-                $this->found_sites[self::LINK_SEEN_URLS][$current_server]);
+                $this->found_sites[self::LINK_SEEN_URLS][$server]);
         } else {
             $this->found_sites[self::SEEN_URLS] =
-                $this->found_sites[self::LINK_SEEN_URLS][$current_server];
+                $this->found_sites[self::LINK_SEEN_URLS][$server];
         }
-        $this->found_sites[self::LINK_SEEN_URLS][$current_server] =
+        $this->found_sites[self::LINK_SEEN_URLS][$server] =
             [];
         if (isset($this->found_sites[self::SEEN_URLS])) {
             $num_seen = count($this->found_sites[self::SEEN_URLS]);
@@ -2968,9 +2965,9 @@ class Fetcher implements CrawlConstants
      * burden on the queue server. The resulting mini index computed by
      * buildMiniInvertedIndex() is stored in
      * $this->found_sites[self::INVERTED_INDEX]
-     * @param string $current_server queue server to build inverted index for
+     * @param string $server index of queue server to build inverted index for
      */
-    public function buildMiniInvertedIndex($current_server)
+    public function buildMiniInvertedIndex($server)
     {
         $start_time = microtime(true);
         $keypad = "\x00\x00\x00\x00";
@@ -2982,9 +2979,9 @@ class Fetcher implements CrawlConstants
             for the fetcher we are not saving the index shards so
             name doesn't matter.
         */
-        if (!isset($this->found_sites[self::INVERTED_INDEX][$current_server])) {
-            $this->found_sites[self::INVERTED_INDEX][$current_server] =
-                new IndexShard("fetcher_shard_{$current_server}");
+        if (!isset($this->found_sites[self::INVERTED_INDEX][$server])) {
+            $this->found_sites[self::INVERTED_INDEX][$server] =
+                new IndexShard("fetcher_shard_{$server}");
         }
         for ($i = 0; $i < $num_seen; $i++) {
             $interim_time = microtime(true);
@@ -3099,7 +3096,7 @@ class Fetcher implements CrawlConstants
             $user_ranks =
                 (empty($site[self::USER_RANKS])) ? [] :
                 $site[self::USER_RANKS];
-            $this->found_sites[self::INVERTED_INDEX][$current_server
+            $this->found_sites[self::INVERTED_INDEX][$server
                 ]->addDocumentWords($doc_keys, self::NEEDS_OFFSET_FLAG,
                 $word_lists, $meta_ids, true, $doc_rank, $description_scores,
                 $user_ranks);
ViewGit