viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
Filename | |
---|---|
src/controllers/MachineController.php | |
src/executables/Fetcher.php | |
src/library/UrlParser.php |
diff --git a/src/controllers/MachineController.php b/src/controllers/MachineController.php index 6eb696f57..45ea39e2a 100644 --- a/src/controllers/MachineController.php +++ b/src/controllers/MachineController.php @@ -161,10 +161,12 @@ class MachineController extends Controller implements CrawlConstants } if (!file_exists($error_log) || filesize($error_log) > C\MAX_LOG_FILE_SIZE) { - $this->web_site->filePutContents($error_log, $out_msg); + /* use file_put_contents as filePutContetns doesn't + support FILE_APPEND + */ + file_put_contents($error_log, $out_msg); } else { - $this->web_site->filePutContents($error_log, $out_msg, - FILE_APPEND); + file_put_contents($error_log, $out_msg, FILE_APPEND); } CrawlDaemon::start("Fetcher", "$id-$channel", "$channel"); break; diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php index 9750c4475..a5e84143c 100755 --- a/src/executables/Fetcher.php +++ b/src/executables/Fetcher.php @@ -2152,8 +2152,8 @@ class Fetcher implements CrawlConstants * Page processors are allowed to extract up to MAX_LINKS_TO_EXTRACT * This method attempts to cull from the doc_info struct the * best MAX_LINKS_PER_PAGE. Currently, this is done by first removing - * links which of filetype or sites the crawler is forbidden from crawl. - * Then a crude estimate of the informaation contained in the links test: + * links of filetype or sites the crawler is forbidden from crawl. + * Then a crude estimate of the information contained in the links test: * strlen(gzip(text)) is used to extract the best remaining links. * * @param array& $doc_info a string with a CrawlConstants::LINKS subarray diff --git a/src/library/UrlParser.php b/src/library/UrlParser.php index 3d98a970f..529e9137b 100755 --- a/src/library/UrlParser.php +++ b/src/library/UrlParser.php @@ -905,9 +905,8 @@ class UrlParser } $info_link = []; // choose the MAX_LINKS_PER_PAGE many pages with most info (crude) - foreach ($links as $url => $info) { - $num_terms = count(preg_split("/\s+|\-|\_|\~/", $info)); - $text = serialize($info); + foreach ($links as $url => $text) { + $num_terms = count(preg_split("/\s+|\-|\_|\~/", $text)); $len_text = strlen($text) + 1; $compressed_len = strlen(gzcompress($text)) + 1; $effective_num_terms = $num_terms * ($compressed_len/$len_text);