viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

Further attempts to make QueueServer restarting work better, a=chris

Chris Pollett [2019-06-17 16:Jun:th]
Further attempts to make QueueServer restarting work better, a=chris
Filename
src/executables/QueueServer.php
src/library/IndexShard.php
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 8c83094b8..de76315be 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -499,11 +499,13 @@ class QueueServer implements CrawlConstants, Join
             $time - $first_check < C\PROCESS_TIMEOUT ) {
             return;
         }
+        $last_check = $time;
         L\crawlLog("Checking if both processes still running ...");
         $lines_to_check = C\LOG_LINES_TO_RESTART;
             //about 20-30 minutes of log data
         $lines = L\tail(C\LOG_DIR . "/" . $this->process_name . ".log",
             $lines_to_check);
+        $time = time(); // just in case took time to compute lines
         L\crawlLog("...Got " . $this->process_name . ".log lines");
         if (count($lines) < $lines_to_check) {
             L\crawlLog("...Too few log lines to check if both processes " .
@@ -522,7 +524,7 @@ class QueueServer implements CrawlConstants, Join
                 L\logLineTimestamp($process_lines[$num_lines - 1]);
             L\crawlLog("...Timestamp of last processed line: ".
                 $timestamp);
-            if (is_numeric($timestamp)) {
+            if (is_numeric($timestamp) && $time > $timestamp) {
                 /*
                    Note if 0 then we have seen LOG_LINES_TO_RESTART lines
                    with no message from other process
diff --git a/src/library/IndexShard.php b/src/library/IndexShard.php
index 0df6a9492..405b027f8 100644
--- a/src/library/IndexShard.php
+++ b/src/library/IndexShard.php
@@ -1362,9 +1362,10 @@ class IndexShard extends PersistentStructure implements CrawlConstants
         $doc_key_len = self::DOC_KEY_LEN;
         $row_len = $doc_key_len;
         $posting_len = self::POSTING_LEN;
-        $num_items = floor($docids_len/$row_len);
+        $num_items = floor($docids_len / $row_len);
         $item_cnt = 0;
         crawlTimeoutLog(true);
+        $missing_count = 0;
         for ($i = 0 ; $i < $docids_len; $i += $row_len) {
             crawlTimeoutLog("..still changing document offsets. At" .
                 " document %s of %s.", $item_cnt, $num_items);
@@ -1393,9 +1394,16 @@ class IndexShard extends PersistentStructure implements CrawlConstants
             if (isset($docid_offsets[$id])) {
                 charCopy(packInt($docid_offsets[$id]), $this->doc_infos,
                     $i, $posting_len);
-            } else if ($offset == self::NEEDS_OFFSET_FLAG) {
-                crawlLog("Document:" . toHexString($id) .
+            } else if ($offset == self::NEEDS_OFFSET_FLAG &&
+                $missing_count < 100) {
+                crawlLog("Index Shard Document:" . toHexString($id) .
                     " still needs offset");
+                $missing_count++;
+            } else if ($offset == self::NEEDS_OFFSET_FLAG &&
+                $missing_count == 100) {
+                crawlLog("Index Shard: too many docs still need offset, " .
+                    "not logging rest");
+                $missing_count++;
             }
         }
     }
ViewGit