viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
Filename | |
---|---|
bin/fetcher.php | |
controllers/search_controller.php | |
lib/web_archive_bundle.php | |
models/parallel_model.php |
diff --git a/bin/fetcher.php b/bin/fetcher.php index 0d0387e8d..fc7b7a9e2 100755 --- a/bin/fetcher.php +++ b/bin/fetcher.php @@ -1782,14 +1782,12 @@ class Fetcher implements CrawlConstants $not_loc = false; } $site[self::ROBOT_INSTANCE] = $prefix.ROBOT_INSTANCE; - if (!is_dir(CRAWL_DIR."/cache")) { mkdir(CRAWL_DIR."/cache"); $htaccess = "Options None\nphp_flag engine off\n"; file_put_contents(CRAWL_DIR."/cache/.htaccess", $htaccess); } - if ($type == "text/robot" && isset($doc_info[self::PAGE])) { $site[self::PAGE] = $doc_info[self::PAGE]; @@ -1936,14 +1934,13 @@ class Fetcher implements CrawlConstants $cache_page_partition = $this->web_archive->addPages( self::OFFSET, $filter_stored); } else if ($num_pages > 0) { - $this->web_archive->addCount(count($filter_stored)); } for ($i = 0; $i < $num_pages; $i++) { $summarized_site_pages[$i][self::INDEX] = $num_items + $i; } foreach ($filter_stored as $stored) { - $i= $stored[self::INDEX]; + $i = $stored[self::INDEX]; if (isset($stored[self::OFFSET])) { $summarized_site_pages[$i][self::OFFSET] = $stored[self::OFFSET]; diff --git a/controllers/search_controller.php b/controllers/search_controller.php index 0874834f6..c0961d6d5 100755 --- a/controllers/search_controller.php +++ b/controllers/search_controller.php @@ -1489,9 +1489,8 @@ class SearchController extends Controller implements CrawlConstants $dom = new DOMDocument(); restore_error_handler(); $did_dom = @$dom->loadHTML('<?xml encoding="UTF-8">' . $cache_file); - set_error_handler("yioop_error_handler"); foreach ($dom->childNodes as $item) { - if ($item->nodeType == XML_PI_NODE) + if (isset($item->nodeType) && $item->nodeType == XML_PI_NODE) $dom->removeChild($item); // remove hack } $dom->encoding = "UTF-8"; // insert proper @@ -1518,10 +1517,9 @@ class SearchController extends Controller implements CrawlConstants tl('search_controller_yioop_cache') . "</title></head>". "<body>".$cache_file."</body></html>"; $dom = new DOMDocument(); - restore_error_handler(); @$dom->loadHTML($cache_file); - set_error_handler("yioop_error_handler"); } + set_error_handler("yioop_error_handler"); $body = $dom->getElementsByTagName('body')->item(0); //make tags in body absolute $body = $this->canonicalizeLinks($body, $url); diff --git a/lib/web_archive_bundle.php b/lib/web_archive_bundle.php index 414e0e9eb..92872264b 100755 --- a/lib/web_archive_bundle.php +++ b/lib/web_archive_bundle.php @@ -301,6 +301,9 @@ class WebArchiveBundle $info = unserialize(file_get_contents($this->dir_name."/description.txt")); $info[$field] += $num; + if($field == "COUNT") { + $this->count = $info[$field]; + } if (!$this->read_only_archive) { file_put_contents($this->dir_name."/description.txt", serialize($info), LOCK_EX); diff --git a/models/parallel_model.php b/models/parallel_model.php index 2f2ac6c5f..94fef360d 100755 --- a/models/parallel_model.php +++ b/models/parallel_model.php @@ -377,7 +377,6 @@ class ParallelModel extends Model implements CrawlConstants return false; } $num_retrieved = 0; - $pages = array(); $summary_offset = null; if (!isset($index_archive->generation_info['ACTIVE'])) { return false; @@ -392,19 +391,13 @@ class ParallelModel extends Model implements CrawlConstants } $word_iterator = new WordIterator($info[0][4], $index_name, true); if (is_array($next_docs = $word_iterator->nextDocsWithWord())) { - foreach ($next_docs as $doc_key => $doc_info) { - $summary_offset = - $doc_info[CrawlConstants::SUMMARY_OFFSET]; - $generation = $doc_info[CrawlConstants::GENERATION]; - $page = @$index_archive->getPage($summary_offset, $generation); - $num_retrieved++; - if ($num_retrieved >= 1) { - break; - } - } - if ($num_retrieved == 0) { + $doc_info = current($next_docs); + if (!$doc_info) { return false; - } + } + $summary_offset = + $doc_info[CrawlConstants::SUMMARY_OFFSET]; + $generation = $doc_info[CrawlConstants::GENERATION]; } else { return false; }