viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
Filename | |
---|---|
src/configs/Config.php | |
src/executables/Fetcher.php | |
src/library/UrlParser.php | |
src/library/Utility.php | |
src/locale/zh_CN/resources/Tokenizer.php |
diff --git a/src/configs/Config.php b/src/configs/Config.php index 5ac9b3a32..8e3a29eec 100755 --- a/src/configs/Config.php +++ b/src/configs/Config.php @@ -169,7 +169,7 @@ nsdefine('MIN_AD_VERSION', 36); * Version number for upgrading locale resource folders and for upgrading * public and help wikis */ -nsdefine('RESOURCES_WIKI_VERSION', 8); +nsdefine('RESOURCES_WIKI_VERSION', 9); /** * nsdefine's the BASE_URL constant for this script * if run from the command line as part of index.php HTTP server scrip diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php index 2bb94b995..2aded9716 100755 --- a/src/executables/Fetcher.php +++ b/src/executables/Fetcher.php @@ -1015,7 +1015,7 @@ class Fetcher implements CrawlConstants $time = time(); $this->check_crawl_time = $time; $session = md5($time . C\AUTH_KEY); - $prefix = $this->fetcher_num . "-" . $this->channel ."-"; + $prefix = $this->fetcher_num . "-" . $this->channel . "-"; $robot_instance = $prefix . C\ROBOT_INSTANCE; $time_change = false; $crawl_time = !is_null($this->crawl_time) ? $this->crawl_time : 0; @@ -1367,7 +1367,6 @@ class Fetcher implements CrawlConstants } else if (isset($info['ARCHIVE_BUNDLE_ERROR'])) { L\crawlLog(" ".$info['ARCHIVE_BUNDLE_ERROR']); } - L\crawlLog("Time to fetch archive data from name server ". L\changeInMicrotime($start_time)); return $info; @@ -1384,11 +1383,11 @@ class Fetcher implements CrawlConstants ini_get("memory_limit")) * C\MEMORY_FILL_FACTOR); } /** - * At least once, and while memory is low picks at server at random and send + * At least once, and while memory is low selects next server and send * any fetcher data we have to it. * * @param bool $at_least_once whether to send to the site info to at least - * queue server or to send only if memory is above threshold + * one queue server or to send only if memory is above threshold */ public function selectCurrentServerAndUpdateIfNeeded($at_least_once) { @@ -1397,11 +1396,9 @@ class Fetcher implements CrawlConstants /* Make sure no queue server starves if to crawl data available. Try to keep memory foot print smaller. */ + $cs = $this->current_server; + $next_server = ($cs + 1) % $num_servers; do { - if (!$at_least_once) { - $this->current_server = rand(0, $num_servers - 1); - } - $cs = $this->current_server; if ($at_least_once || (isset($this->found_sites[self::TO_CRAWL][$cs]) && count($this->found_sites[self::TO_CRAWL][$cs]) > 0) || @@ -1409,10 +1406,10 @@ class Fetcher implements CrawlConstants $this->updateScheduler(); $at_least_once = false; } + $cs = ($cs + 1) % $num_servers; $i++; - } while($this->exceedMemoryThreshold() && - $i < $num_servers * ceil(log($num_servers)) ); - //coupon collecting expected i before have seen all + } while($this->exceedMemoryThreshold() && $i < $num_servers); + $this->current_server = $next_server; } /** * Sets parameters for fetching based on provided info struct diff --git a/src/library/UrlParser.php b/src/library/UrlParser.php index 667c8d514..4d92a3529 100755 --- a/src/library/UrlParser.php +++ b/src/library/UrlParser.php @@ -932,10 +932,13 @@ class UrlParser $info_link = []; // choose the MAX_LINKS_PER_PAGE many pages with most info (crude) foreach ($links as $url => $text) { - $num_terms = count(preg_split("/\s+|\-|\_|\~/", $text)); + $text = (is_string($text)) ? $text : ""; + $terms = preg_split("/\s+|\-|\_|\~/", $text); + $num_terms = count($terms); $len_text = strlen($text) + 1; $compressed_len = strlen(gzcompress($text)) + 1; - $effective_num_terms = $num_terms * ($compressed_len/$len_text); + $effective_num_terms = $num_terms * + (min($compressed_len/$len_text, 1)); if (!isset($info_link[$url])) { $info_link[$url] = $effective_num_terms; } else { diff --git a/src/library/Utility.php b/src/library/Utility.php index d12ae90cc..be5dfbd00 100755 --- a/src/library/Utility.php +++ b/src/library/Utility.php @@ -1609,31 +1609,33 @@ function checkTimeInterval($start_time, $duration, $time = -1) function convertPixels($value) { $len = strlen($value); - if ($len < 2) return intval($value); + if (is_int($value) || $len < 2) { + return intval($value); + } if ($value[$len - 1] == "%") { $num = floatval(substr($value, 0, $len - 1)); - return ($num > 0) ? floor(8*min(100, $num)) : 0; + return ($num > 0) ? floor(8 * min(100, $num)) : 0; } $num = floatval(substr($value, 0, $len - 2)); $unit = substr($value, $len - 2); switch ($unit) { case "cm": case "pt": - return intval(28*$num); + return intval(28 * $num); break; case "em": case "pc": - return intval(6*$num); + return intval(6 * $num); break; case "ex": - return intval(12*$num); + return intval(12 * $num); break; case "in": //assume screen 72 dpi as on mac - return intval(72*$num); + return intval(72 * $num); break; case "mm": - return intval(2.8*$num); + return intval(2.8 * $num); break; case "px": return intval($num); diff --git a/src/locale/zh_CN/resources/Tokenizer.php b/src/locale/zh_CN/resources/Tokenizer.php index f0769c5a7..5bf9ef0a4 100755 --- a/src/locale/zh_CN/resources/Tokenizer.php +++ b/src/locale/zh_CN/resources/Tokenizer.php @@ -894,7 +894,8 @@ class Tokenizer if (!empty($tree_vp['NP'])) { $nb = $tree_vp['NP']; $object['CONCISE'] = $tree_vp['NP']; - while (is_array($object['CONCISE']["ADD_NP"]["NP"])) { + while (isset($object['CONCISE']["ADD_NP"]["NP"]) && + is_array($object['CONCISE']["ADD_NP"]["NP"])) { $object['CONCISE'] = $object['CONCISE']["ADD_NP"]["NP"]; } $object['CONCISE']=$object['CONCISE']["NN"]["NN"]; @@ -961,10 +962,11 @@ class Tokenizer $subject = []; if (!empty($tree['NP'])) { $subject['CONCISE'] = $tree['NP']; - while (is_array($subject['CONCISE']["ADD_NP"]["NP"])) { + while (isset($subject['CONCISE']["ADD_NP"]["NP"]) && + is_array($subject['CONCISE']["ADD_NP"]["NP"])) { $subject['CONCISE'] = $subject['CONCISE']["ADD_NP"]["NP"]; } - $subject['CONCISE']=$subject['CONCISE']["NN"]["NN"]; + $subject['CONCISE'] = $subject['CONCISE']["NN"]["NN"] ?? ""; $raw_subject = ""; $it = new \RecursiveIteratorIterator( new \RecursiveArrayIterator($tree['NP']));