viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

Fixes a bug in pack unpack posting, cosmetic tweaks to ArcTool.php and QueueServer.php code, a=chris

Chris Pollett [2019-06-24 17:Jun:th]
Fixes a bug in pack unpack posting, cosmetic tweaks to ArcTool.php and QueueServer.php code, a=chris
Filename
src/executables/ArcTool.php
src/executables/QueueServer.php
src/library/Utility.php
tests/UtilityTest.php
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php
index b75e7b30d..8cdc94189 100755
--- a/src/executables/ArcTool.php
+++ b/src/executables/ArcTool.php
@@ -285,9 +285,11 @@ class ArcTool implements CrawlConstants
         }
         $hash_paths = L\allCrawlHashPaths($word, true);
         $found = false;
-        echo "!!Performing Looking up for phrase " .
-            "at each possible shift position. Outputting results for each ".
-            "possibility!!\n";
+        if (count($hash_paths) > 1) {
+            echo "!!Performing Looking up for phrase " .
+                "at each possible shift position. Outputting results for each ".
+                "possibility!!\n";
+        }
         foreach ($hash_paths as $hash_shift) {
             if (is_array($hash_shift)) {
                 list($hash_key, $shift) = $hash_shift;
@@ -298,7 +300,9 @@ class ArcTool implements CrawlConstants
             $start_time = microtime(true);
             echo "Looking up in dictionary:\n";
             echo " Key: ". L\toHexString($hash_key) . "\n";
-            echo " Shift: ". $shift . "\n";
+            if (is_array($hash_shift)) {
+                echo " Shift: ". $shift . "\n";
+            }
             $info = IndexManager::getWordInfo($index_timestamp, $hash_key,
                 $shift, -1, $start_generation, $num_generations);
             echo "Dictionary Lookup Time:" . L\changeInMicrotime($start_time)
@@ -520,7 +524,7 @@ class ArcTool implements CrawlConstants
         };
         $document_word = ($count == 1) ? "Document" : "Documents";
         echo "$count $document_word Found:\n";
-        echo str_pad("", $count + 1, "=")."================\n";
+        echo str_pad("", $count + 1, "=") . "================\n";
         $j = 0;
         foreach ($documents as $key => $document) {
             echo "\nDOC ID: " . L\toHexString($key);
@@ -548,20 +552,22 @@ class ArcTool implements CrawlConstants
                         $i = 0;
                     }
                 }
-                if ($i != 0) { echo "\n"; }
+                if ($i != 0) {
+                    echo "\n";
+                }
             }
             $page = @$index->getPage($summary_offset);

             if (isset($page[self::TITLE])) {
                 echo "SUMMARY TITLE:\n";
                 echo "--------------\n";
-                echo wordwrap($page[self::TITLE], 80)."\n";
+                echo wordwrap($page[self::TITLE], 80) . "\n";
             }

             if (isset($page[self::DESCRIPTION])) {
                 echo "SUMMARY DESCRIPTION:\n";
                 echo "--------------\n";
-                echo $page[self::DESCRIPTION]."\n";
+                echo $page[self::DESCRIPTION] . "\n";
                 }
             $j++;
         }
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 47003bca5..cbe5177c9 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -1925,7 +1925,7 @@ class QueueServer implements CrawlConstants, Join
             L\crawlLog("Done index bundle reset, current memory usage is: ".
                 $current_usage);
             if ((0.7 * $memory_limit) < $current_usage) {
-                L\crawlLog("!!!Usage still exceeds threshold, exiting");
+                L\crawlLog("!!!Indexer usage still exceeds threshold, exiting");
                 exit();
             }
         }
diff --git a/src/library/Utility.php b/src/library/Utility.php
index 56e95a2e5..094f95d11 100755
--- a/src/library/Utility.php
+++ b/src/library/Utility.php
@@ -261,9 +261,6 @@ function packPosting($doc_index, $position_list, $delta = true)
     } else {
         $delta_list = $position_list;
     }
-    if (isset($delta_list[0])){
-        $delta_list[0]++;
-    }
     if ( $doc_index >= (2 << 14) && isset($delta_list[0])
         && $delta_list[0] < (2 << 9)  && $doc_index < (2 << 17)) {
         $delta_list[0] += (((2 << 17) + $doc_index) << 9);
@@ -294,17 +291,14 @@ function unpackPosting($posting, &$offset, $dedelta = true)
     $delta_list = (array) decodeModified9($posting, $offset);
     $doc_index = array_shift($delta_list);
     if (($doc_index & (2 << 26)) > 0) {
-        $delta0 = ($doc_index & ((2 << 9) - 1));
-        array_unshift($delta_list, $delta0);
-        $doc_index -= $delta0;
-        $doc_index -= (2 << 26);
+        $delta0 = $doc_index;
         $doc_index >>= 9;
+        $doc_index -= (2 << 17);
+        $delta0 -= (((2 << 17) + $doc_index) << 9);
+        array_unshift($delta_list, $delta0);
     } else {
         $doc_index--;
     }
-    if (isset($delta_list[0])) {
-        $delta_list[0]--;
-    }
     if ($dedelta) {
         deDeltaList($delta_list);
     }
diff --git a/tests/UtilityTest.php b/tests/UtilityTest.php
index 5b69ff4a5..9d47d2a4f 100644
--- a/tests/UtilityTest.php
+++ b/tests/UtilityTest.php
@@ -53,6 +53,18 @@ class UtilityTest extends UnitTest
     public function tearDown()
     {
     }
+    /**
+     * Used to check Encoding decoding using Modified9 coding
+     */
+    public function modified9TestCase()
+    {
+        $encode_list = [151466751, 11746, 11746];
+        $encoded = L\encodeModified9($encode_list);
+        $offset = 0;
+        $decode_list = L\decodeModified9($encoded, $offset);
+        $this->assertEqual($encode_list, $decode_list,
+            "Encoding and decoding an array with Modified9 gives same result");
+    }
     /**
      * Used to check if posting lists can be properly encoded/decoded
      */
@@ -80,6 +92,22 @@ class UtilityTest extends UnitTest
             "Doc index from unpack of long packed posting equal");
         $this->assertEqual($out_doc_list[1], $posting_list,
             "Unpack of long packed posting equal");
+        $offset = 0;
+        $posting_list = [254, 12000, 24000];
+        $packed = L\packPosting(33689, $posting_list);
+        $out_doc_list = L\unpackPosting($packed, $offset, true);
+        $this->assertEqual($out_doc_list[0], 33689,
+            "Doc index from unpack of first word has delta 0 case");
+        $this->assertEqual($out_doc_list[1], $posting_list,
+            "Unpack of delta 0 case");
+        $offset = 0;
+        $posting_list = [511, 12000, 24000];
+        $packed = L\packPosting(33689, $posting_list);
+        $out_doc_list = L\unpackPosting($packed, $offset, true);
+        $this->assertEqual($out_doc_list[0], 33689,
+            "Doc index from unpack of first word has delta 0 case");
+        $this->assertEqual($out_doc_list[1], $posting_list,
+            "Unpack of delta 0 case");
         $posting_list = [6000, 12000, 24000];
         $packed = L\packPosting(100000, $posting_list);
         $offset = 0;
ViewGit