viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

Fix crash when Fetcher web_archive null, a=chris

Chris Pollett [2020-06-25 21:Jun:th]
Fix crash when Fetcher web_archive null, a=chris
Filename
src/controllers/ResourceController.php
src/executables/Fetcher.php
src/executables/QueueServer.php
src/library/StochasticTermSegmenter.php
diff --git a/src/controllers/ResourceController.php b/src/controllers/ResourceController.php
index 80a4ccc8f..b79966e77 100644
--- a/src/controllers/ResourceController.php
+++ b/src/controllers/ResourceController.php
@@ -186,7 +186,7 @@ class ResourceController extends Controller implements CrawlConstants
      */
     public function getNameAndBaseFolder($is_src_folder = false)
     {
-        $name = $this->clean($_REQUEST['n'], "file_name");
+        $name = $this->clean($_REQUEST['n'] ?? "", "file_name");
         $type = UrlParser::getDocumentType($name);
         if (isset($_REQUEST['feed']) ||
             (!empty($_REQUEST['t']) && $_REQUEST['t'] == 'feed')) {
diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php
index 1021c0053..34acbe75d 100755
--- a/src/executables/Fetcher.php
+++ b/src/executables/Fetcher.php
@@ -742,32 +742,37 @@ class Fetcher implements CrawlConstants
                     continue;
                 }
             }
-            switch ($this->crawl_type) {
-                case self::WEB_CRAWL:
-                    $downloaded_pages = $this->downloadPagesWebCrawl();
-                    break;
-                case self::ARCHIVE_CRAWL:
-                    if (isset($info[self::ARC_DATA])) {
-                        $downloaded_pages = $info[self::ARC_DATA];
-                    } else {
-                        $downloaded_pages = $this->downloadPagesArchiveCrawl();
-                    }
-                    break;
+            if (empty($this->web_archive)) {
+                L\crawlLog("Fetcher web_archive empty skipping page download");
+            } else {
+                switch ($this->crawl_type) {
+                    case self::WEB_CRAWL:
+                        $downloaded_pages = $this->downloadPagesWebCrawl();
+                        break;
+                    case self::ARCHIVE_CRAWL:
+                        if (isset($info[self::ARC_DATA])) {
+                            $downloaded_pages = $info[self::ARC_DATA];
+                        } else {
+                            $downloaded_pages =
+                                $this->downloadPagesArchiveCrawl();
+                        }
+                        break;
+                }
+                if (isset($downloaded_pages["NO_PROCESS"])) {
+                    unset($downloaded_pages["NO_PROCESS"]);
+                    $summarized_site_pages = array_values($downloaded_pages);
+                    $this->no_process_links = true;
+                } else {
+                    $summarized_site_pages =
+                        $this->processFetchPages($downloaded_pages);
+                    $this->no_process_links = false;
+                }
+                L\crawlLog("Number of summarized pages ".
+                    count($summarized_site_pages));
+                $force_send = (isset($info[self::END_ITERATOR]) &&
+                    $info[self::END_ITERATOR]) ? true : false;
+                $this->updateFoundSites($summarized_site_pages, $force_send);
             }
-            if (isset($downloaded_pages["NO_PROCESS"])) {
-                unset($downloaded_pages["NO_PROCESS"]);
-                $summarized_site_pages = array_values($downloaded_pages);
-                $this->no_process_links = true;
-            } else{
-                $summarized_site_pages =
-                    $this->processFetchPages($downloaded_pages);
-                $this->no_process_links = false;
-            }
-            L\crawlLog("Number of summarized pages ".
-                count($summarized_site_pages));
-            $force_send = (isset($info[self::END_ITERATOR]) &&
-                $info[self::END_ITERATOR]) ? true : false;
-            $this->updateFoundSites($summarized_site_pages, $force_send);
             $sleep_time = max(0, ceil($this->minimum_fetch_loop_time
                 - L\changeInMicrotime($start_time)));
             if ($sleep_time > 0) {
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 05472dddd..3b2541a87 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -1997,9 +1997,9 @@ class QueueServer implements CrawlConstants, Join
         if ((0.7 * $memory_limit) < $current_usage ||
             in_array($this->debug, ['EXCEED_MEMORY', 'EXCEED_MEMORY_HARD'])) {
             L\crawlLog("Indexer memory usage threshold exceeded!!!");
-            L\crawlLog("...Threshold is: " . (0.7 * $memory_limit));
-            L\crawlLog("...Current usage is: " . $current_usage);
-            L\crawlLog("...Trying to free memory by resetting " .
+            L\crawlLog("...Indexer Threshold is: " . (0.7 * $memory_limit));
+            L\crawlLog("...Indexer Current usage is: " . $current_usage);
+            L\crawlLog("...Indexer trying to free memory by resetting " .
                 "index bundle.");
             $this->index_archive->forceSave();
             $this->index_archive = null;
diff --git a/src/library/StochasticTermSegmenter.php b/src/library/StochasticTermSegmenter.php
index 4d64658d9..089cfaa05 100644
--- a/src/library/StochasticTermSegmenter.php
+++ b/src/library/StochasticTermSegmenter.php
@@ -485,7 +485,8 @@ class StochasticTermSegmenter
                     }
                     $subdic = $subdic[$characters[$j]];
                     if (isset($subdic['$']) && (!isset($score[$j]) ||
-                        $score[$index - 1] + $subdic['$'] < $score[$j])) {
+                        (isset($score[$index - 1]) &&
+                        $score[$index - 1] + $subdic['$'] < $score[$j]))) {
                         $score[$j] = $score[$index - 1] +
                             $this->getScore($subdic['$']);
                         $path[$j] = $index - 1;
@@ -501,7 +502,8 @@ class StochasticTermSegmenter
                     }
                     $subdic = $subdic[$characters[$j]];
                     if (isset($subdic['$']) && (!isset($score[$j]) ||
-                        $score[$index - 1] + $subdic['$'] < $score[$j])) {
+                        (isset($score[$index - 1]) &&
+                        $score[$index - 1] + $subdic['$'] < $score[$j]))) {
                         $score[$j] = $score[$index - 1] +
                             $this->getScore($subdic['$']);
                         $path[$j] = $index - 1;
ViewGit