viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
Filename | |
---|---|
src/controllers/ResourceController.php | |
src/executables/Fetcher.php | |
src/executables/QueueServer.php | |
src/library/StochasticTermSegmenter.php |
diff --git a/src/controllers/ResourceController.php b/src/controllers/ResourceController.php index 80a4ccc8f..b79966e77 100644 --- a/src/controllers/ResourceController.php +++ b/src/controllers/ResourceController.php @@ -186,7 +186,7 @@ class ResourceController extends Controller implements CrawlConstants */ public function getNameAndBaseFolder($is_src_folder = false) { - $name = $this->clean($_REQUEST['n'], "file_name"); + $name = $this->clean($_REQUEST['n'] ?? "", "file_name"); $type = UrlParser::getDocumentType($name); if (isset($_REQUEST['feed']) || (!empty($_REQUEST['t']) && $_REQUEST['t'] == 'feed')) { diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php index 1021c0053..34acbe75d 100755 --- a/src/executables/Fetcher.php +++ b/src/executables/Fetcher.php @@ -742,32 +742,37 @@ class Fetcher implements CrawlConstants continue; } } - switch ($this->crawl_type) { - case self::WEB_CRAWL: - $downloaded_pages = $this->downloadPagesWebCrawl(); - break; - case self::ARCHIVE_CRAWL: - if (isset($info[self::ARC_DATA])) { - $downloaded_pages = $info[self::ARC_DATA]; - } else { - $downloaded_pages = $this->downloadPagesArchiveCrawl(); - } - break; + if (empty($this->web_archive)) { + L\crawlLog("Fetcher web_archive empty skipping page download"); + } else { + switch ($this->crawl_type) { + case self::WEB_CRAWL: + $downloaded_pages = $this->downloadPagesWebCrawl(); + break; + case self::ARCHIVE_CRAWL: + if (isset($info[self::ARC_DATA])) { + $downloaded_pages = $info[self::ARC_DATA]; + } else { + $downloaded_pages = + $this->downloadPagesArchiveCrawl(); + } + break; + } + if (isset($downloaded_pages["NO_PROCESS"])) { + unset($downloaded_pages["NO_PROCESS"]); + $summarized_site_pages = array_values($downloaded_pages); + $this->no_process_links = true; + } else { + $summarized_site_pages = + $this->processFetchPages($downloaded_pages); + $this->no_process_links = false; + } + L\crawlLog("Number of summarized pages ". + count($summarized_site_pages)); + $force_send = (isset($info[self::END_ITERATOR]) && + $info[self::END_ITERATOR]) ? true : false; + $this->updateFoundSites($summarized_site_pages, $force_send); } - if (isset($downloaded_pages["NO_PROCESS"])) { - unset($downloaded_pages["NO_PROCESS"]); - $summarized_site_pages = array_values($downloaded_pages); - $this->no_process_links = true; - } else{ - $summarized_site_pages = - $this->processFetchPages($downloaded_pages); - $this->no_process_links = false; - } - L\crawlLog("Number of summarized pages ". - count($summarized_site_pages)); - $force_send = (isset($info[self::END_ITERATOR]) && - $info[self::END_ITERATOR]) ? true : false; - $this->updateFoundSites($summarized_site_pages, $force_send); $sleep_time = max(0, ceil($this->minimum_fetch_loop_time - L\changeInMicrotime($start_time))); if ($sleep_time > 0) { diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php index 05472dddd..3b2541a87 100755 --- a/src/executables/QueueServer.php +++ b/src/executables/QueueServer.php @@ -1997,9 +1997,9 @@ class QueueServer implements CrawlConstants, Join if ((0.7 * $memory_limit) < $current_usage || in_array($this->debug, ['EXCEED_MEMORY', 'EXCEED_MEMORY_HARD'])) { L\crawlLog("Indexer memory usage threshold exceeded!!!"); - L\crawlLog("...Threshold is: " . (0.7 * $memory_limit)); - L\crawlLog("...Current usage is: " . $current_usage); - L\crawlLog("...Trying to free memory by resetting " . + L\crawlLog("...Indexer Threshold is: " . (0.7 * $memory_limit)); + L\crawlLog("...Indexer Current usage is: " . $current_usage); + L\crawlLog("...Indexer trying to free memory by resetting " . "index bundle."); $this->index_archive->forceSave(); $this->index_archive = null; diff --git a/src/library/StochasticTermSegmenter.php b/src/library/StochasticTermSegmenter.php index 4d64658d9..089cfaa05 100644 --- a/src/library/StochasticTermSegmenter.php +++ b/src/library/StochasticTermSegmenter.php @@ -485,7 +485,8 @@ class StochasticTermSegmenter } $subdic = $subdic[$characters[$j]]; if (isset($subdic['$']) && (!isset($score[$j]) || - $score[$index - 1] + $subdic['$'] < $score[$j])) { + (isset($score[$index - 1]) && + $score[$index - 1] + $subdic['$'] < $score[$j]))) { $score[$j] = $score[$index - 1] + $this->getScore($subdic['$']); $path[$j] = $index - 1; @@ -501,7 +502,8 @@ class StochasticTermSegmenter } $subdic = $subdic[$characters[$j]]; if (isset($subdic['$']) && (!isset($score[$j]) || - $score[$index - 1] + $subdic['$'] < $score[$j])) { + (isset($score[$index - 1]) && + $score[$index - 1] + $subdic['$'] < $score[$j]))) { $score[$j] = $score[$index - 1] + $this->getScore($subdic['$']); $path[$j] = $index - 1;