viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

remove and clean up some dead code related to cached web pages, a=chris

Chris Pollett [2015-06-06 06:Jun:th]
remove and clean up some dead code related to cached web pages, a=chris
Filename
bin/fetcher.php
controllers/search_controller.php
lib/web_archive_bundle.php
models/parallel_model.php
diff --git a/bin/fetcher.php b/bin/fetcher.php
index 0d0387e8d..fc7b7a9e2 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -1782,14 +1782,12 @@ class Fetcher implements CrawlConstants
                         $not_loc = false;
                 }
                 $site[self::ROBOT_INSTANCE] = $prefix.ROBOT_INSTANCE;
-
                 if (!is_dir(CRAWL_DIR."/cache")) {
                     mkdir(CRAWL_DIR."/cache");
                     $htaccess = "Options None\nphp_flag engine off\n";
                     file_put_contents(CRAWL_DIR."/cache/.htaccess",
                         $htaccess);
                 }
-
                 if ($type == "text/robot" &&
                     isset($doc_info[self::PAGE])) {
                         $site[self::PAGE] = $doc_info[self::PAGE];
@@ -1936,14 +1934,13 @@ class Fetcher implements CrawlConstants
             $cache_page_partition = $this->web_archive->addPages(
                 self::OFFSET, $filter_stored);
         } else if ($num_pages > 0) {
-
             $this->web_archive->addCount(count($filter_stored));
         }
         for ($i = 0; $i < $num_pages; $i++) {
             $summarized_site_pages[$i][self::INDEX] = $num_items + $i;
         }
         foreach ($filter_stored as $stored) {
-            $i= $stored[self::INDEX];
+            $i = $stored[self::INDEX];
             if (isset($stored[self::OFFSET])) {
                 $summarized_site_pages[$i][self::OFFSET] =
                     $stored[self::OFFSET];
diff --git a/controllers/search_controller.php b/controllers/search_controller.php
index 0874834f6..c0961d6d5 100755
--- a/controllers/search_controller.php
+++ b/controllers/search_controller.php
@@ -1489,9 +1489,8 @@ class SearchController extends Controller implements CrawlConstants
         $dom = new DOMDocument();
         restore_error_handler();
         $did_dom = @$dom->loadHTML('<?xml encoding="UTF-8">' . $cache_file);
-        set_error_handler("yioop_error_handler");
         foreach ($dom->childNodes as $item) {
-            if ($item->nodeType == XML_PI_NODE)
+            if (isset($item->nodeType) && $item->nodeType == XML_PI_NODE)
                 $dom->removeChild($item); // remove hack
         }
         $dom->encoding = "UTF-8"; // insert proper
@@ -1518,10 +1517,9 @@ class SearchController extends Controller implements CrawlConstants
                 tl('search_controller_yioop_cache') . "</title></head>".
                 "<body>".$cache_file."</body></html>";
             $dom = new DOMDocument();
-            restore_error_handler();
             @$dom->loadHTML($cache_file);
-            set_error_handler("yioop_error_handler");
         }
+        set_error_handler("yioop_error_handler");
         $body =  $dom->getElementsByTagName('body')->item(0);
         //make tags in body absolute
         $body = $this->canonicalizeLinks($body, $url);
diff --git a/lib/web_archive_bundle.php b/lib/web_archive_bundle.php
index 414e0e9eb..92872264b 100755
--- a/lib/web_archive_bundle.php
+++ b/lib/web_archive_bundle.php
@@ -301,6 +301,9 @@ class WebArchiveBundle
         $info =
             unserialize(file_get_contents($this->dir_name."/description.txt"));
         $info[$field] += $num;
+        if($field == "COUNT") {
+            $this->count = $info[$field];
+        }
         if (!$this->read_only_archive) {
             file_put_contents($this->dir_name."/description.txt",
                 serialize($info), LOCK_EX);
diff --git a/models/parallel_model.php b/models/parallel_model.php
index 2f2ac6c5f..94fef360d 100755
--- a/models/parallel_model.php
+++ b/models/parallel_model.php
@@ -377,7 +377,6 @@ class ParallelModel extends Model implements CrawlConstants
             return false;
         }
         $num_retrieved = 0;
-        $pages = array();
         $summary_offset = null;
         if (!isset($index_archive->generation_info['ACTIVE'])) {
             return false;
@@ -392,19 +391,13 @@ class ParallelModel extends Model implements CrawlConstants
         }
         $word_iterator = new WordIterator($info[0][4], $index_name, true);
         if (is_array($next_docs = $word_iterator->nextDocsWithWord())) {
-             foreach ($next_docs as $doc_key => $doc_info) {
-                 $summary_offset =
-                    $doc_info[CrawlConstants::SUMMARY_OFFSET];
-                 $generation = $doc_info[CrawlConstants::GENERATION];
-                 $page = @$index_archive->getPage($summary_offset, $generation);
-                 $num_retrieved++;
-                 if ($num_retrieved >=  1) {
-                     break;
-                 }
-             }
-             if ($num_retrieved == 0) {
+            $doc_info = current($next_docs);
+            if (!$doc_info) {
                 return false;
-             }
+            }
+            $summary_offset =
+                $doc_info[CrawlConstants::SUMMARY_OFFSET];
+            $generation = $doc_info[CrawlConstants::GENERATION];
         } else {
             return false;
         }
ViewGit