viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

On the way to modifying tim new code, a=chris

Chris Pollett [2019-12-10 22:Dec:th]
On the way to modifying tim new code, a=chris
Filename
src/configs/Config.php
src/configs/Createdb.php
src/executables/ArcTool.php
src/library/CrawlConstants.php
src/library/IndexArchiveBundle.php
src/library/IndexManager.php
src/library/IndexShard.php
src/library/VersionFunctions.php
src/library/index_bundle_iterators/ReverseIterator.php
src/library/index_bundle_iterators/WordIterator.php
src/library/media_jobs/AnalyticsJob.php
src/library/media_jobs/FeedsUpdateJob.php
src/models/CrawlModel.php
src/models/PhraseModel.php
tests/IndexShardTest.php
diff --git a/src/configs/Config.php b/src/configs/Config.php
index b22405e87..895273dc7 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -158,7 +158,7 @@ function nsconddefine($constant, $value)
  * Version number for upgrade database function
  * @var int
  */
-nsdefine('DATABASE_VERSION', 68);
+nsdefine('DATABASE_VERSION', 67);
 /**
  * Minimum Version fo Yioop for which keyword ad script
  * still works with this version
diff --git a/src/configs/Createdb.php b/src/configs/Createdb.php
index a3c2918b2..897333a13 100755
--- a/src/configs/Createdb.php
+++ b/src/configs/Createdb.php
@@ -153,9 +153,9 @@ $sql ="INSERT INTO USERS VALUES (".PUBLIC_USER_ID.", 'all', 'all','public',
 $db->execute($sql);
 //default public group with group id 1
 $creation_time = L\microTimestamp();
-$sql = "INSERT INTO GROUPS VALUES(".PUBLIC_GROUP_ID.",'Public','".
-    $creation_time."','".ROOT_ID."', '".PUBLIC_JOIN."', '".GROUP_READ.
-    "', ".NON_VOTING_GROUP.", " . FOREVER . ", 0)";
+$sql = "INSERT INTO GROUPS VALUES(".PUBLIC_GROUP_ID.",'Public','" .
+    $creation_time . "','".ROOT_ID."', '"  .PUBLIC_JOIN . "', '" . GROUP_READ .
+    "', " . NON_VOTING_GROUP.", " . FOREVER . ", 0)";
 $db->execute($sql);
 $now = time();
 $db->execute("INSERT INTO ROLE VALUES (" . ADMIN_ROLE . ", 'Admin' )");
@@ -164,9 +164,9 @@ $db->execute("INSERT INTO ROLE VALUES (".BOT_ROLE.", 'Bot User' )");
 $db->execute("INSERT INTO USER_ROLE VALUES (" . ROOT_ID . ", " . ADMIN_ROLE .
     ")");
 $db->execute("INSERT INTO USER_GROUP VALUES (" . ROOT_ID . ", ".
-    PUBLIC_GROUP_ID.", ".ACTIVE_STATUS.", $now)");
+    PUBLIC_GROUP_ID.", " . ACTIVE_STATUS . ", $now)");
 $db->execute("INSERT INTO USER_GROUP VALUES (".PUBLIC_USER_ID.", ".
-    PUBLIC_GROUP_ID.", ".ACTIVE_STATUS.", $now)");
+    PUBLIC_GROUP_ID.", " . ACTIVE_STATUS . ", $now)");
 //Create a Group for Wiki HELP.
 $sql = "INSERT INTO GROUPS VALUES (" . HELP_GROUP_ID . ",'Help','" .
     $creation_time . "','" . ROOT_ID . "',
@@ -490,7 +490,7 @@ $db->execute("INSERT INTO MIX_COMPONENTS VALUES(
     3, 0, 1, 1, 'media:video')");
 $db->execute("INSERT INTO CRAWL_MIXES VALUES (4, 'news', ".ROOT_ID.", -1)");
 $db->execute("INSERT INTO MIX_FRAGMENTS VALUES(4, 0, 1)");
-$db->execute("INSERT INTO MIX_COMPONENTS VALUES(4, 0, 1, 1,
+$db->execute("INSERT INTO MIX_COMPONENTS VALUES(4, 0, 100, 1,
     'media:news')");
 $db->execute("INSERT INTO SUBSEARCH VALUES('db_subsearch_images',
     'images','m:2', 50, '')");
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php
index 8c5dcd5a0..73846ca5e 100755
--- a/src/executables/ArcTool.php
+++ b/src/executables/ArcTool.php
@@ -282,8 +282,8 @@ class ArcTool implements CrawlConstants
         $index_timestamp = (isset($matches[0])) ? $matches[0] : 0;
         if ($bundle_num >= 0) {
             $index_timestamp .= "-$bundle_num";
-        } else if ($bundle_name == "IndexDataNewsFeed") {
-            $index_timestamp = "NewsFeed";
+        } else if ($bundle_name == "IndexDataFeed") {
+            $index_timestamp = "feed";
         }
         $hash_paths = L\allCrawlHashPaths($word, true);
         $found = false;
@@ -373,8 +373,8 @@ class ArcTool implements CrawlConstants
         $index_timestamp = (isset($matches[0])) ? $matches[0] : 0;
         if ($bundle_num >= 0) {
             $index_timestamp .= "-$bundle_num";
-        } else if ($bundle_name == "IndexDataNewsFeed") {
-            $index_timestamp = "NewsFeed";
+        } else if ($bundle_name == "IndexDataFeed") {
+            $index_timestamp = "feed";
         }
         $index = IndexManager::getIndex($index_timestamp);
         $index->setCurrentShard($generation);
@@ -493,8 +493,8 @@ class ArcTool implements CrawlConstants
         $index_timestamp = (isset($matches[0])) ? $matches[0] : 0;
         if ($bundle_num >= 0) {
             $index_timestamp .= "-$bundle_num";
-        } else if ($bundle_num = "IndexDataNewsFeed") {
-            $index_timestamp = "NewsFeed";
+        } else if ($bundle_num = "IndexDataFeed") {
+            $index_timestamp = "feed";
         }
         $index = IndexManager::getIndex($index_timestamp);
         $index->setCurrentShard($generation, true);
diff --git a/src/library/CrawlConstants.php b/src/library/CrawlConstants.php
index 75d41fcbd..1546388a9 100755
--- a/src/library/CrawlConstants.php
+++ b/src/library/CrawlConstants.php
@@ -60,7 +60,7 @@ interface CrawlConstants
     const robot_data_base_name = "RobotData";
     const etag_expires_data_base_name = "EtagExpiresData";
     const index_data_base_name = "IndexData";
-    const feed_index_data_base_name = "IndexDataNewsFeed";
+    const feed_index_data_base_name = "IndexDataFeed";
     const double_index_base_name = "DoubleIndexData";
     const network_base_name = "Network";
     const network_crawllist_base_name = "NetworkCrawlList";
@@ -74,6 +74,11 @@ interface CrawlConstants
     const robot_table_name = "robot_table.txt";
     const mirror_table_name = "mirror_table.txt";
     const local_ip_cache_file = "local_ip_cache.txt";
+    /** used for word iterator direction  */
+    const FORWARD = 1;
+    const BACKWARD = -1;
+    /** media feed index archive bundle timestamp */
+    const FEED_CRAWL_TIME = 100;
     /** Used in priority queue*/
     const MAX = 1;
     const MIN = -1;
@@ -113,6 +118,7 @@ interface CrawlConstants
     const WIDTH = 'C';
     const ROBOTS_TXT = 'D';
     const DEBUG = "E";
+    const DIRECTION = "F";
     // codes available here
     const DOC_DEPTH = 'M';
     const DOC_RANK = 'N';
diff --git a/src/library/IndexArchiveBundle.php b/src/library/IndexArchiveBundle.php
index 167d98b6e..88736b1a0 100644
--- a/src/library/IndexArchiveBundle.php
+++ b/src/library/IndexArchiveBundle.php
@@ -317,7 +317,8 @@ class IndexArchiveBundle implements CrawlConstants
      *      merge dictionary side effects
      * @return object the currently being index shard
      */
-     public function getCurrentShard($force_read = false, $forward = true)
+     public function getCurrentShard($force_read = false,
+        $direction = self::FORWARD)
      {
         if (!isset($this->current_shard)) {
             if (!isset($this->generation_info['CURRENT'])) {
@@ -331,7 +332,7 @@ class IndexArchiveBundle implements CrawlConstants
                     $this->current_shard = new IndexShard(
                         $current_index_shard_file,
                         $this->generation_info['CURRENT'],
-                        $this->num_docs_per_generation, true, $forward);
+                        $this->num_docs_per_generation, true, $direction);
                     $this->current_shard->getShardHeader($force_read);
                     $this->current_shard->read_only_from_disk = true;
                 } else {
@@ -415,7 +416,9 @@ class IndexArchiveBundle implements CrawlConstants
     public function countWordKeys($word_keys)
     {
         $words_array = [];
-        if (!is_array($word_keys) || count($word_keys) < 1) { return null;}
+        if (!is_array($word_keys) || count($word_keys) < 1) {
+            return null;
+        }
         foreach ($word_keys as $word_key) {
             $tmp = $this->dictionary->getWordInfo($word_key);
             if ($tmp === false) {
diff --git a/src/library/IndexManager.php b/src/library/IndexManager.php
index 95df96176..1a7f87715 100644
--- a/src/library/IndexManager.php
+++ b/src/library/IndexManager.php
@@ -62,13 +62,13 @@ class IndexManager implements CrawlConstants
     const INDEX_CACHE_SIZE = 1000;
     /**
      * Returns a reference to the managed copy of an IndexArchiveBundle object
-     * with a given timestamp or an IndexShard in the case where
-     * $index_name == "feed" (for handling media feeds)
+     * with a given timestamp or feed (for handling media feeds)
      *
      * @param string $index_name timestamp of desired IndexArchiveBundle
+     * @param int $direction
      * @return object the desired IndexArchiveBundle reference
      */
-    public static function getIndex($index_name, $forward_direction = true)
+    public static function getIndex($index_name, $direction = self::FORWARD)
     {
         $index_name = trim($index_name); //trim to fix postgres quirkiness
         if (empty(self::$indexes[$index_name]) ||
@@ -76,65 +76,53 @@ class IndexManager implements CrawlConstants
             ($index_name == "feed" || php_sapi_name() == 'cli') &&
             (time() - self::$index_times[$index_name])
             > C\MIN_QUERY_CACHE_TIME) ) {
-            if ($index_name == "feed") {
-                $index_file = C\WORK_DIRECTORY . "/feeds/index";
-                if (file_exists($index_file)) {
-                    self::$indexes[$index_name] = new IndexShard(
-                        $index_file, 0, C\NUM_DOCS_PER_GENERATION, true);
-                    self::$index_times["feed"]  = time();
-                } else {
+            if ($index_name == "feed" || $index_name == self::FEED_CRAWL_TIME) {
+                $index_archive_name = self::feed_index_data_base_name;
+                $index_name = "feed";
+            } else {
+                $index_archive_name = self::index_data_base_name . $index_name;
+            }
+            if (file_exists(C\CRAWL_DIR.'/cache/' . $index_archive_name)) {
+                $tmp = new IndexArchiveBundle(
+                    C\CRAWL_DIR . '/cache/' . $index_archive_name, null,
+                    C\NUM_DOCS_PER_GENERATION, $direction);
+                if (!$tmp) {
                     return false;
                 }
             } else {
-                if ($index_name == "NewsFeed") {
-                    $index_archive_name = self::feed_index_data_base_name;
-                    $index_name = 13;
-                } else {
-                    $index_archive_name = self::index_data_base_name . $index_name;
+                $tmp = false;
+                $use_name = $index_name;
+                $serve_archive = -1;
+                if (preg_match("/\-\d$/", $index_name)) {
+                    $serve_archive = substr($index_name, -1);
+                    $use_name = substr($index_name, 0, -2);
                 }
-                $index_archive_name = self::index_data_base_name . $index_name;
-                if (file_exists(C\CRAWL_DIR.'/cache/' . $index_archive_name)) {
-                    $tmp = new IndexArchiveBundle(
-                        C\CRAWL_DIR.'/cache/' . $index_archive_name, null,
-                        C\NUM_DOCS_PER_GENERATION, $forward_direction);
-                    if (!$tmp) {
-                        return false;
-                    }
-                } else {
-                    $tmp = false;
-                    $use_name = $index_name;
-                    $serve_archive = -1;
-                    if (preg_match("/\-\d$/", $index_name)) {
-                        $serve_archive = substr($index_name, -1);
-                        $use_name = substr($index_name, 0, -2);
-                    }
-                    $index_archive_name = self::double_index_base_name .
-                        $use_name;
-                    $status_file = C\CRAWL_DIR . '/cache/' .
-                        $index_archive_name . "/status.txt";
-                    if ($serve_archive < 0 && file_exists($status_file)) {
-                        $status = unserialize(file_get_contents($status_file));
-                        $active_archive = (empty($status["swap_count"])) ? 1 :
-                            $status["swap_count"] % 2;
-                        $serve_archive = 1 - $active_archive;
-                    }
+                $index_archive_name = self::double_index_base_name .
+                    $use_name;
+                $status_file = C\CRAWL_DIR . '/cache/' .
+                    $index_archive_name . "/status.txt";
+                if ($serve_archive < 0 && file_exists($status_file)) {
+                    $status = unserialize(file_get_contents($status_file));
+                    $active_archive = (empty($status["swap_count"])) ? 1 :
+                        $status["swap_count"] % 2;
+                    $serve_archive = 1 - $active_archive;
+                }
+                $tmp = new IndexArchiveBundle(
+                    C\CRAWL_DIR . '/cache/' . $index_archive_name .
+                    "/bundle$serve_archive");
+                if (!$tmp) {
+                    $serve_archive = ($serve_archive == 0) ? 1 : 0;
                     $tmp = new IndexArchiveBundle(
                         C\CRAWL_DIR . '/cache/' . $index_archive_name .
                         "/bundle$serve_archive");
-                    if (!$tmp) {
-                        $serve_archive = ($serve_archive == 0) ? 1 : 0;
-                        $tmp = new IndexArchiveBundle(
-                            C\CRAWL_DIR . '/cache/' . $index_archive_name .
-                            "/bundle$serve_archive");
-                    }
-                    if (!$tmp) {
-                        return false;
-                    }
                 }
-                self::$indexes[$index_name] = $tmp;
-                self::$indexes[$index_name]->setCurrentShard(0, true);
-                self::$index_times[$index_name] = time();
+                if (!$tmp) {
+                    return false;
+                }
             }
+            self::$indexes[$index_name] = $tmp;
+            self::$indexes[$index_name]->setCurrentShard(0, true);
+            self::$index_times[$index_name] = time();
             /*
                If too many cached discard oldest 1/3 of cached indices
              */
@@ -207,20 +195,12 @@ class IndexManager implements CrawlConstants
         $threshold = -1, $start_generation = -1, $num_distinct_generations = -1,
         $with_remaining_total = false)
     {
+        if ($index_name == self::FEED_CRAWL_TIME) {
+            $index_name = "feed";
+        }
         $id = "$index_name:$start_generation:$num_distinct_generations";
         $index = self::getIndex($index_name);
         $tmp = [];
-        if ((!C\nsdefined('NO_FEEDS') || !C\NO_FEEDS) &&
-            $start_generation < 0
-            && file_exists(C\WORK_DIRECTORY . "/feeds/index")) {
-            $use_feeds = true;
-            $feed_shard = self::getIndex("feed");
-            $feed_info = $feed_shard->getWordInfo($hash, true, $shift);
-            if (is_array($feed_info)) {
-                $tmp[-1] = [-1, $feed_info[0],
-                    $feed_info[1], $feed_info[2], $feed_info[3]];
-            }
-        }
         if (!empty($index->dictionary)) {
             $pre_info =
                 $index->dictionary->getWordInfo($hash, true, $shift,
diff --git a/src/library/IndexShard.php b/src/library/IndexShard.php
index 49240695b..472972129 100644
--- a/src/library/IndexShard.php
+++ b/src/library/IndexShard.php
@@ -283,7 +283,7 @@ class IndexShard extends PersistentStructure implements CrawlConstants
      *
      * @param string $fname filename to store the index shard with
      * @param int $generation when returning documents from the shard
-     *      pretend there ar ethis many earlier documents
+     *      pretend there are this many earlier documents
      * @param int $num_docs_per_generation the number of documents that a
      *      given shard can hold.
      * @param bool $read_only_from_disk used to determined if this shard is
@@ -673,17 +673,19 @@ class IndexShard extends PersistentStructure implements CrawlConstants
         }
         // Normal forward iterator
         if ($forward_dir) {
-            return $this->postingsSliceForward($start_offset, $next_offset, $last_offset,
-                    $len);
-        }
-        // Reverse direction iterator used for newsfeed
-        else {
-            return $this->postingsSliceBackward($start_offset, $next_offset, $last_offset,
-                    $len);
+            return $this->postingsSliceForward($start_offset, $next_offset,
+                $last_offset, $len);
+        } else {
+            // Reverse direction iterator used for newsfeed
+            return $this->postingsSliceBackward($start_offset, $next_offset,
+                $last_offset, $len);
         }
     }
-    public function postingsSliceForward($start_offset, &$next_offset, $last_offset,
-        $len)
+    /**
+     *
+     */
+    public function postingsSliceForward($start_offset, &$next_offset,
+        $last_offset, $len)
     {
         $num_docs_so_far = 0;
         $results = [];
@@ -719,8 +721,11 @@ class IndexShard extends PersistentStructure implements CrawlConstants
         $next_offset = $next << 2;
         return $results;
     }
-    public function postingsSliceBackward($start_offset, &$next_offset, $last_offset,
-        $len)
+    /**
+     *
+     */
+    public function postingsSliceBackward($start_offset, &$next_offset,
+        $last_offset, $len)
     {
         $num_docs_so_far = 0;
         $results = [];
@@ -745,8 +750,9 @@ class IndexShard extends PersistentStructure implements CrawlConstants
                 break;
             }
             $posting_start = $next;
-            // getPostingAtOffset will modify both start and end to the value of next
-            // using addresses
+            /* getPostingAtOffset will modify both start and end to the value of
+               next using addresses
+             */
             $posting = $this->getPostingAtOffset(
                 $next, $posting_start, $posting_end);
             $total_posting_len += strlen($posting);
@@ -1225,12 +1231,10 @@ class IndexShard extends PersistentStructure implements CrawlConstants
             if ($this->forward_direction) {
                 $results = $this->getPostingsSlice($first_offset,
                     $first_offset, $last_offset, $len);
-            }
-            else {
+            } else {
                 $results = $this->getPostingsSlice($first_offset,
-                $last_offset, $last_offset, $len);
+                    $last_offset, $last_offset, $len, false);
             }
-
         }
         return $results;
     }
@@ -2188,4 +2192,4 @@ class IndexShard extends PersistentStructure implements CrawlConstants
             substr($value, self::WORD_KEY_LEN,
                 self::WORD_DATA_LEN);
     }
-}
\ No newline at end of file
+}
diff --git a/src/library/VersionFunctions.php b/src/library/VersionFunctions.php
index 078aad92b..09ab43367 100644
--- a/src/library/VersionFunctions.php
+++ b/src/library/VersionFunctions.php
@@ -1850,14 +1850,3 @@ function upgradeDatabaseVersion67(&$db)
     $db->execute("ALTER TABLE SUBSEARCH ADD COLUMN " .
         "DEFAULT_QUERY VARCHAR(" . C\TITLE_LEN . ") DEFAULT ''");
 }
-/**
- * Upgrades a Version 67 version of the Yioop database to a Version 68 version
- * @param object $db datasource to use to upgrade.
- */
-function upgradeDatabaseVersion68(&$db)
-{
-    $db->execute("DELETE FROM MIX_COMPONENTS WHERE MIX_TIMESTAMP=4
-        AND GROUP_ID=0");
-    $db->execute("INSERT INTO MIX_COMPONENTS VALUES(
-        4, 0, 13, 1, 'media:news')");
-}
diff --git a/src/library/index_bundle_iterators/ReverseIterator.php b/src/library/index_bundle_iterators/ReverseIterator.php
index 5436146da..a11ac0810 100644
--- a/src/library/index_bundle_iterators/ReverseIterator.php
+++ b/src/library/index_bundle_iterators/ReverseIterator.php
@@ -86,17 +86,6 @@ class ReverseIterator extends IndexBundleIterator
      * @var array
      */
     public $dictionary_info;
-    /**
-     * File name (including path) of the feed shard for news items
-     * @var string
-     */
-    public $feed_shard_name;
-    /**
-     * Structure used to hold posting list start and stops for the query
-     * in the feed shard
-     * @var array
-     */
-    public $feed_info;
     /**
      * The total number of shards that have data for this word
      * @var int
@@ -148,10 +137,6 @@ class ReverseIterator extends IndexBundleIterator
     const HOST_KEY_POS = 17;
     /** Length of a doc key*/
     const KEY_LEN = 8;
-    /** If the $limit_feeds constructor input is true then limit the number
-     * of items coming from the feed shard to this count.
-     */
-    const LIMIT_FEEDS_COUNT = 25;
     /**
      * Creates a word iterator with the given parameters.
      *
@@ -164,15 +149,10 @@ class ReverseIterator extends IndexBundleIterator
      *     results
      * @param int $results_per_block the maximum number of results that can
      *      be returned by a findDocsWithWord call
-     * @param bool $limit_feeds feed results appear before all others when
-     *      gotten out of this iterator (may be reordered later). This flag
-     *      controls whether an upper bound of self::LIMIT_FEEDS_COUNT is
-     *      imposed on the number of feed results returned
      */
     public function __construct($word_key, $shift, $index_name, $raw = false,
-        $filter = null,
-        $results_per_block = IndexBundleIterator::RESULTS_PER_BLOCK,
-        $limit_feeds = false)
+        $filter = null, $results_per_block =
+        IndexBundleIterator::RESULTS_PER_BLOCK)
     {
         if ($raw == false) {
             //get rid of out modified base64 encoding
@@ -181,55 +161,10 @@ class ReverseIterator extends IndexBundleIterator
         $this->filter = $filter;
         $this->word_key = $word_key;
         $this->shift = $shift;
-        // 13 is somewhat of a magic number right now
-        if($index_name == 13) {
-            $index_name = "NewsFeed";
-        }
         $this->index_name = $index_name;
-        list($estimated_total, $this->dictionary_info) =
+        list($this->num_docs, $this->dictionary_info) =
             IndexManager::getWordInfo($index_name, $word_key, $shift,
             -1, -1, C\NUM_DISTINCT_GENERATIONS, true);
-        $this->feed_shard_name = C\WORK_DIRECTORY . "/feeds/index";
-        if ((!C\nsdefined('NO_FEEDS') || !C\NO_FEEDS)
-            && file_exists($this->feed_shard_name)) {
-            $this->use_feeds = true;
-        } else {
-            $this->use_feeds = false;
-        }
-        if ($this->use_feeds) {
-            if (!isset($this->dictionary_info[-1])) {
-                $this->feed_info = false;
-                $this->feed_empty = true;
-            } else {
-                $this->feed_info = $this->dictionary_info[-1];
-                unset($this->dictionary_info[-1]);
-                $this->feed_empty = false;
-            }
-        } else {
-            $this->feed_info = false;
-            $this->feed_empty = true;
-        }
-        if (is_array($this->feed_info)) {
-            list(,$this->feed_start, $this->feed_end, $this->feed_count,) =
-                $this->feed_info;
-            $this->feed_info = [$this->feed_start, $this->feed_end,
-                $this->feed_count];
-        } else {
-            $this->feed_start = 0;
-            $this->feed_end = 0;
-            $this->feed_count = 0;
-        }
-        if ($this->feed_count > 0) {
-            $this->using_feeds = true;
-        } else {
-            $this->using_feeds = false;
-        }
-        if ($limit_feeds && $this->feed_count > self::LIMIT_FEEDS_COUNT) {
-            $this->feed_count = self::LIMIT_FEEDS_COUNT;
-            $this->feed_end = $this->feed_start +
-                IndexShard::POSTING_LEN * (self::LIMIT_FEEDS_COUNT - 1);
-        }
-        $this->num_docs = $this->feed_count + $estimated_total;
         if ($this->dictionary_info === false) {
             $this->empty = true;
         } else {
@@ -248,7 +183,7 @@ class ReverseIterator extends IndexBundleIterator
         $this->results_per_block = $results_per_block;
         $this->current_block_fresh = false;
         $this->start_generation = $this->num_generations-1;
-        if ($this->dictionary_info !== false || $this->feed_info !== false) {
+        if ($this->dictionary_info !== false) {
             $this->reset();
         }
     }
@@ -259,19 +194,12 @@ class ReverseIterator extends IndexBundleIterator
      */
     public function reset()
     {
-        if ($this->feed_count > 0) {
-            $this->using_feeds = true;
-        } else {
-            $this->using_feeds = false;
-        }
-        $no_feeds = $this->feed_empty || !$this->use_feeds;
         if (!$this->empty) {//we shouldn't be called when empty - but to be safe
             if ($this->start_generation < $this->num_generations-1) {
-                list($estimated_total, $this->dictionary_info) =
+                list($this->num_docs, $this->dictionary_info) =
                     IndexManager::getWordInfo($this->index_name,
                     $this->word_key, 0, -1, 0, C\NUM_DISTINCT_GENERATIONS,
                     true);
-                $this->num_docs = $this->feed_count + $estimated_total;
                 ksort($this->dictionary_info);
                 $this->dictionary_info = array_values($this->dictionary_info);
                 $this->num_generations = count($this->dictionary_info);
@@ -281,15 +209,12 @@ class ReverseIterator extends IndexBundleIterator
             list($this->current_generation, $this->start_offset,
                 $this->last_offset, )
                 = $this->dictionary_info[$this->num_generations-1];
-        # if the feed isn't empty
-        } else {
-            $this->start_offset = 0;
-            $this->last_offset = -1;
-            $this->num_generations = -1;
         }
         $this->current_offset = $this->last_offset;
-        // reset pointer to the number of gens, which in reverse is the first one we want
-        $this->generation_pointer = $this->num_generations-1;
+        /*  reset pointer to the number of gens, which in reverse is the
+           first one we want
+         */
+        $this->generation_pointer = $this->num_generations - 1;
         $this->count_block = 0;
         $this->seen_docs = 0;
         $this->current_doc_offset = null;
@@ -317,20 +242,19 @@ class ReverseIterator extends IndexBundleIterator
             $index->setCurrentShard($this->current_generation, true);
             //the next call also updates next offset
             $shard = $index->getCurrentShard(false, false);
-            $pre_results = $shard->getPostingsSlice(
-                $this->start_offset,
+            $pre_results = $shard->getPostingsSlice($this->start_offset,
                 $this->next_offset, $this->last_offset,
                 $this->results_per_block, false);
-            if($this->index_name == "NewsFeed") {
+            if($this->index_name == "feed") {
                 $time = time();
                 foreach ($pre_results as $keys => $pre_result) {
-                    $page = $index->getPage($pre_result[self::SUMMARY_OFFSET],
+                    $page = $index->getPage($pre_result[self::SUMMARY_OFFSET],
                         $this->current_generation);
                     $delta = $time - $page[self::PUBDATE];
                     $pre_results[$keys][self::DOC_RANK] = 720000 /
                         max($delta, 1);
                 }
-            }
+            }
         }
         $results = [];
         $doc_key_len = IndexShard::DOC_KEY_LEN;
@@ -348,11 +272,7 @@ class ReverseIterator extends IndexBundleIterator
             } else {
                 continue;
             }
-            if (!empty($data[self::IS_FEED])) {
-                $data[self::CRAWL_TIME] = "feed";
-            } else {
-                $data[self::CRAWL_TIME] = $this->index_name;
-            }
+            $data[self::CRAWL_TIME] = $this->index_name;
             $results[$keys] = $data;
         }
         $this->count_block = count($results);
@@ -365,10 +285,11 @@ class ReverseIterator extends IndexBundleIterator
     }
     /**
      * Updates the seen_docs count during an advance() call
-     * For a reverse shard, instead of adding to the offset, we subtract by a block instead.
+     * For a reverse shard, instead of adding to the offset, we subtract by a
+     * block instead.
      */
     public function advanceSeenDocs()
-    {
+    {
         if ($this->current_block_fresh != true) {
             $total_guess = IndexShard::numDocsOrLinks($this->next_offset,
                     $this->start_offset);
@@ -405,8 +326,7 @@ class ReverseIterator extends IndexBundleIterator
         }
         $cur_gen_doc_offset = $this->currentGenDocOffsetWithWord();
         if ($cur_gen_doc_offset == -1 ||
-            $this->genDocOffsetCmp($cur_gen_doc_offset,
-            $gen_doc_offset) < 0) {
+            $this->genDocOffsetCmp($cur_gen_doc_offset, $gen_doc_offset) < 0) {
             return;
         }
         $this->plainAdvance();
@@ -414,16 +334,10 @@ class ReverseIterator extends IndexBundleIterator
             $this->advanceGeneration($gen_doc_offset[0]);
             $this->next_offset = $this->current_offset;
         }
-        $using_feeds = $this->using_feeds && $this->use_feeds;
-        if ($using_feeds) {
-            $shard = IndexManager::getIndex("feed");
-            $last = $this->feed_end;
-        } else {
-            $index = IndexManager::getIndex($this->index_name, false);
-            $index->setCurrentShard($this->current_generation, true);
-            $shard = $index->getCurrentShard(false, false);
-            $start = $this->start_offset;
-        }
+        $index = IndexManager::getIndex($this->index_name, false);
+        $index->setCurrentShard($this->current_generation, true);
+        $shard = $index->getCurrentShard(false, false);
+        $start = $this->start_offset;
         if ($this->current_generation == $gen_doc_offset[0]) {
             $offset_pair = $shard->nextPostingOffsetDocOffset(
                  $start, $this->next_offset, $gen_doc_offset[1], false);
@@ -471,27 +385,25 @@ class ReverseIterator extends IndexBundleIterator
      */
     public function advanceGeneration($generation = null)
     {
-        if ($this->using_feeds && $this->use_feeds) {
-            $this->using_feeds = false;
-            $this->generation_pointer = -1;
-        }
         if ($generation === null) {
             $generation = $this->current_generation;
         }
         do {
-            # RC if the pointer is greater than the total generations, subtract
+            // RC if the pointer is greater than the total generations, subtract
             if ($this->generation_pointer >= 0) {
                 $this->generation_pointer--;
             }
-            # RC if the generation pointer is still more than the number of generations
+            /* RC if the generation pointer is still more than the number of
+               generations
+             */
             if ($this->generation_pointer >= 0) {
                 list($this->current_generation, $this->start_offset,
                     $this->last_offset, )
                     = $this->dictionary_info[$this->generation_pointer];
-                #set the current offset to the last one of the dictionary
+                //set the current offset to the last one of the dictionary
                 $this->current_offset = $this->last_offset;
             }
-            # if there are more generations and
+            // if there are more generations and
             if (!$this->no_more_generations &&
                 $this->current_generation > $generation &&
                 $this->generation_pointer <= 0) {
@@ -528,16 +440,17 @@ class ReverseIterator extends IndexBundleIterator
         if ($this->current_doc_offset !== null) {
             return [$this->current_generation, $this->current_doc_offset];
         }
-        # if the current offset is before the first one, or if gen pointer is less than 0
-        # we are in an impossible position
+        /* if the current offset is before the first one,
+           or if gen pointer is less than 0 we are in an impossible position
+         */
         if ($this->current_offset < $this->start_offset||
             $this->generation_pointer <= -1) {
             return -1;
         }
-        $index = IndexManager::getIndex($this->index_name);
+        $index = IndexManager::getIndex($this->index_name, false);
         $index->setCurrentShard($this->current_generation, true);
         $this->current_doc_offset = $index->getCurrentShard(
             )->docOffsetFromPostingOffset($this->current_offset, false);
         return [$this->current_generation, $this->current_doc_offset];
     }
-}
\ No newline at end of file
+}
diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php
index 333dfb6df..bc80db2c1 100644
--- a/src/library/index_bundle_iterators/WordIterator.php
+++ b/src/library/index_bundle_iterators/WordIterator.php
@@ -86,17 +86,6 @@ class WordIterator extends IndexBundleIterator
      * @var array
      */
     public $dictionary_info;
-    /**
-     * File name (including path) of the feed shard for news items
-     * @var string
-     */
-    public $feed_shard_name;
-    /**
-     * Structure used to hold posting list start and stops for the query
-     * in the feed shard
-     * @var array
-     */
-    public $feed_info;
     /**
      * The total number of shards that have data for this word
      * @var int
@@ -147,10 +136,6 @@ class WordIterator extends IndexBundleIterator
     const HOST_KEY_POS = 17;
     /** Length of a doc key*/
     const KEY_LEN = 8;
-    /** If the $limit_feeds constructor input is true then limit the number
-     * of items coming from the feed shard to this count.
-     */
-    const LIMIT_FEEDS_COUNT = 25;
     /**
      * Creates a word iterator with the given parameters.
      *
@@ -163,15 +148,10 @@ class WordIterator extends IndexBundleIterator
      *      of edited and deleted search results
      * @param int $results_per_block the maximum number of results that can
      *      be returned by a findDocsWithWord call
-     * @param bool $limit_feeds feed results appear before all others when
-     *      gotten out of this iterator (may be reordered later). This flag
-     *      controls whether an upper bound of self::LIMIT_FEEDS_COUNT is
-     *      imposed on the number of feed results returned
      */
     public function __construct($word_key, $shift, $index_name, $raw = false,
-        $filter = null,
-        $results_per_block = IndexBundleIterator::RESULTS_PER_BLOCK,
-        $limit_feeds = false)
+        $filter = null, $results_per_block =
+        IndexBundleIterator::RESULTS_PER_BLOCK)
     {
         if ($raw == false) {
             //get rid of out modified base64 encoding
@@ -180,54 +160,10 @@ class WordIterator extends IndexBundleIterator
         $this->filter = $filter;
         $this->word_key = $word_key;
         $this->shift = $shift;
-        if($index_name == 13) {
-            $index_name = "NewsFeed";
-        }
         $this->index_name =  $index_name;
-        list($estimated_total, $this->dictionary_info) =
+        list($this->num_docs, $this->dictionary_info) =
             IndexManager::getWordInfo($index_name, $word_key, $shift,
             -1, -1, C\NUM_DISTINCT_GENERATIONS, true);
-        $this->feed_shard_name = C\WORK_DIRECTORY . "/feeds/index";
-        if ((!C\nsdefined('NO_FEEDS') || !C\NO_FEEDS)
-            && file_exists($this->feed_shard_name)) {
-            $this->use_feeds = true;
-        } else {
-            $this->use_feeds = false;
-        }
-        if ($this->use_feeds) {
-            if (!isset($this->dictionary_info[-1])) {
-                $this->feed_info = false;
-                $this->feed_empty = true;
-            } else {
-                $this->feed_info = $this->dictionary_info[-1];
-                unset($this->dictionary_info[-1]);
-                $this->feed_empty = false;
-            }
-        } else {
-            $this->feed_info = false;
-            $this->feed_empty = true;
-        }
-        if (is_array($this->feed_info)) {
-            list(,$this->feed_start, $this->feed_end, $this->feed_count,) =
-                $this->feed_info;
-            $this->feed_info = [$this->feed_start, $this->feed_end,
-                $this->feed_count];
-        } else {
-            $this->feed_start = 0;
-            $this->feed_end = 0;
-            $this->feed_count = 0;
-        }
-        if ($this->feed_count > 0) {
-            $this->using_feeds = true;
-        } else {
-            $this->using_feeds = false;
-        }
-        if ($limit_feeds && $this->feed_count > self::LIMIT_FEEDS_COUNT) {
-            $this->feed_count = self::LIMIT_FEEDS_COUNT;
-            $this->feed_end = $this->feed_start +
-                IndexShard::POSTING_LEN * (self::LIMIT_FEEDS_COUNT - 1);
-        }
-        $this->num_docs = $this->feed_count + $estimated_total;
         if ($this->dictionary_info === false) {
             $this->empty = true;
         } else {
@@ -246,7 +182,7 @@ class WordIterator extends IndexBundleIterator
         $this->results_per_block = $results_per_block;
         $this->current_block_fresh = false;
         $this->start_generation = 0;
-        if ($this->dictionary_info !== false || $this->feed_info !== false) {
+        if ($this->dictionary_info !== false) {
             $this->reset();
         }
     }
@@ -256,19 +192,12 @@ class WordIterator extends IndexBundleIterator
      */
     public function reset()
     {
-        if ($this->feed_count > 0) {
-            $this->using_feeds = true;
-        } else {
-            $this->using_feeds = false;
-        }
-        $no_feeds = $this->feed_empty || !$this->use_feeds;
         if (!$this->empty) {//we shouldn't be called when empty - but to be safe
             if ($this->start_generation > 0) {
-                list($estimated_total, $this->dictionary_info) =
+                list($this->num_docs, $this->dictionary_info) =
                     IndexManager::getWordInfo($this->index_name,
                     $this->word_key, 0, -1, 0, C\NUM_DISTINCT_GENERATIONS,
                     true);
-                $this->num_docs = $this->feed_count + $estimated_total;
                 ksort($this->dictionary_info);
                 $this->dictionary_info = array_values($this->dictionary_info);
                 $this->num_generations = count($this->dictionary_info);
@@ -283,12 +212,7 @@ class WordIterator extends IndexBundleIterator
             $this->last_offset = -1;
             $this->num_generations = -1;
         }
-        if (!$no_feeds) {
-            $this->current_offset = $this->feed_start;
-            $this->current_generation = -1;
-        } else {
-            $this->current_offset = $this->start_offset;
-        }
+        $this->current_offset = $this->start_offset;
         $this->generation_pointer = 0;
         $this->count_block = 0;
         $this->seen_docs = 0;
@@ -302,41 +226,21 @@ class WordIterator extends IndexBundleIterator
      */
     public function findDocsWithWord()
     {
-        $no_feeds = $this->feed_empty || !$this->use_feeds;
-        $feed_in_use = $this->using_feeds && !$no_feeds;
-        if ($this->empty && $no_feeds) {
+        if ($this->empty) {
             return -1;
         }
-        if (!$feed_in_use &&(($this->generation_pointer>=$this->num_generations)
-            || ($this->generation_pointer == $this->num_generations - 1 &&
-            $this->current_offset > $this->last_offset))) {
+        if ($this->generation_pointer == $this->num_generations - 1 &&
+            $this->current_offset > $this->last_offset) {
             return -1;
         }
         $pre_results = [];
-        if ($feed_in_use) {
-            $this->next_offset = $this->current_offset;
-            $feed_shard = IndexManager::getIndex("feed");
-            if ($feed_shard) {
-                $pre_results = $feed_shard->getPostingsSlice(
-                    $this->feed_start,
-                    $this->next_offset, $this->feed_end,
-                    $this->results_per_block);
-                $time = time();
-                foreach ($pre_results as $keys => $pre_result) {
-                    $pre_results[$keys][self::IS_FEED] = true;
-                    $delta = $time - $pre_result[self::SUMMARY_OFFSET];
-                    $pre_results[$keys][self::DOC_RANK] = 720000 /
-                        max($delta, 1);
-                }
-            }
-        } else if (!$this->empty) {
+        if (!$this->empty) {
             $this->next_offset = $this->current_offset;
             $index = IndexManager::getIndex($this->index_name);
             $index->setCurrentShard($this->current_generation, true);
             //the next call also updates next offset
             $shard = $index->getCurrentShard();
-            $pre_results = $shard->getPostingsSlice(
-                $this->start_offset,
+            $pre_results = $shard->getPostingsSlice($this->start_offset,
                 $this->next_offset, $this->last_offset,
                 $this->results_per_block);
         }
@@ -356,11 +260,7 @@ class WordIterator extends IndexBundleIterator
             } else {
                 continue;
             }
-            if (!empty($data[self::IS_FEED])) {
-                $data[self::CRAWL_TIME] = "feed";
-            } else {
-                $data[self::CRAWL_TIME] = $this->index_name;
-            }
+            $data[self::CRAWL_TIME] = $this->index_name;
             $results[$keys] = $data;
         }
         $this->count_block = count($results);
@@ -377,15 +277,9 @@ class WordIterator extends IndexBundleIterator
     public function advanceSeenDocs()
     {
         if ($this->current_block_fresh != true) {
-            if ($this->using_feeds && $this->use_feeds) {
-                $num_docs = min($this->results_per_block,
-                    IndexShard::numDocsOrLinks($this->next_offset,
-                        $this->feed_end));
-            } else {
-                $num_docs = min($this->results_per_block,
-                    IndexShard::numDocsOrLinks($this->next_offset,
-                        $this->last_offset));
-            }
+            $num_docs = min($this->results_per_block,
+                IndexShard::numDocsOrLinks($this->next_offset,
+                    $this->last_offset));
             $this->next_offset = $this->current_offset;
             $this->next_offset += IndexShard::POSTING_LEN * $num_docs;
             if ($num_docs < 0) {
@@ -421,16 +315,10 @@ class WordIterator extends IndexBundleIterator
             $this->advanceGeneration($gen_doc_offset[0]);
             $this->next_offset = $this->current_offset;
         }
-        $using_feeds = $this->using_feeds && $this->use_feeds;
-        if ($using_feeds) {
-            $shard = IndexManager::getIndex("feed");
-            $last = $this->feed_end;
-        } else {
-            $index = IndexManager::getIndex($this->index_name);
-            $index->setCurrentShard($this->current_generation, true);
-            $shard = $index->getCurrentShard();
-            $last = $this->last_offset;
-        }
+        $index = IndexManager::getIndex($this->index_name);
+        $index->setCurrentShard($this->current_generation, true);
+        $shard = $index->getCurrentShard();
+        $last = $this->last_offset;
         if ($this->current_generation == $gen_doc_offset[0]) {
             $offset_pair = $shard->nextPostingOffsetDocOffset(
                 $this->next_offset, $last, $gen_doc_offset[1]);
@@ -442,14 +330,8 @@ class WordIterator extends IndexBundleIterator
                     $offset_pair;
             }
         }
-        if ($this->current_generation == -1) {
-            $this->seen_docs = ($this->current_offset - $this->feed_start) /
-                IndexShard::POSTING_LEN;
-        } else {
-            $this->seen_docs = ($using_feeds) ? $this->feed_count : 0;
-            $this->seen_docs += ($this->current_offset - $this->start_offset) /
-                IndexShard::POSTING_LEN;
-        }
+        $this->seen_docs = ($this->current_offset - $this->start_offset) /
+            IndexShard::POSTING_LEN;
     }
     /**
      * Forwards the iterator one group of docs. This is what's called
@@ -465,10 +347,7 @@ class WordIterator extends IndexBundleIterator
             $this->advanceGeneration();
             $this->next_offset = $this->current_offset;
         }
-        $using_feeds = $this->using_feeds && $this->use_feeds;
-        if (($using_feeds &&
-            $this->current_offset > $this->feed_end) || (!$using_feeds &&
-            $this->current_offset > $this->last_offset)) {
+        if ($this->current_offset > $this->last_offset) {
             $this->advanceGeneration();
             $this->next_offset = $this->current_offset;
         }
@@ -481,10 +360,6 @@ class WordIterator extends IndexBundleIterator
      */
     public function advanceGeneration($generation = null)
     {
-        if ($this->using_feeds && $this->use_feeds) {
-            $this->using_feeds = false;
-            $this->generation_pointer = -1;
-        }
         if ($generation === null) {
             $generation = $this->current_generation;
         }
@@ -518,7 +393,6 @@ class WordIterator extends IndexBundleIterator
                     $this->generation_pointer--;
                 }
             }
-
         } while($this->current_generation < $generation &&
             $this->generation_pointer < $this->num_generations);
     }
@@ -529,22 +403,15 @@ class WordIterator extends IndexBundleIterator
      * @return mixed an array with the desired document offset
      * and generation; -1 on fail
      */
-    public function currentGenDocOffsetWithWord() {
+    public function currentGenDocOffsetWithWord()
+    {
         if ($this->current_doc_offset !== null) {
             return [$this->current_generation, $this->current_doc_offset];
         }
-        $feeds = $this->using_feeds && $this->use_feeds && !$this->feed_empty;
-        if ( ($feeds && $this->current_offset > $this->feed_end) ||
-            (!$feeds && ($this->current_offset > $this->last_offset||
-            $this->generation_pointer >= $this->num_generations))) {
+        if ($this->current_offset > $this->last_offset ||
+            $this->generation_pointer >= $this->num_generations) {
             return -1;
         }
-        if ($feeds) {
-            $index = IndexManager::getIndex("feed");
-            $this->current_doc_offset =
-                $index->docOffsetFromPostingOffset($this->current_offset);
-            return [-1, $this->current_doc_offset];
-        }
         $index = IndexManager::getIndex($this->index_name);
         $index->setCurrentShard($this->current_generation, true);
         $index->setCurrentShard($this->current_generation, true);
diff --git a/src/library/media_jobs/AnalyticsJob.php b/src/library/media_jobs/AnalyticsJob.php
index b515b7cf1..c679360eb 100644
--- a/src/library/media_jobs/AnalyticsJob.php
+++ b/src/library/media_jobs/AnalyticsJob.php
@@ -168,7 +168,7 @@ class AnalyticsJob extends MediaJob
         $num_machines = count($machine_urls);
         if ($num_machines <  1 || ($num_machines ==  1 &&
             UrlParser::isLocalhostUrl($machine_urls[0]))) {
-            $machine_urls = null;
+            $machine_urls = [];
         }
         $queries = [
             "CODE" => [100, 101, 102, 103, 122, 200, 201, 202, 203, 204,
diff --git a/src/library/media_jobs/FeedsUpdateJob.php b/src/library/media_jobs/FeedsUpdateJob.php
index 478f4cdc6..ad067906e 100644
--- a/src/library/media_jobs/FeedsUpdateJob.php
+++ b/src/library/media_jobs/FeedsUpdateJob.php
@@ -605,12 +605,12 @@ class FeedsUpdateJob extends MediaJob
         $time = time();
         $prune_shard_name = C\WORK_DIRECTORY . "/feeds/prune_index";
         $dir = C\CRAWL_DIR . '/cache/' . self::feed_index_data_base_name;
-        $info['DESCRIPTION'] = "NewsFeed";
-        $info['FORWARD_DIRECTION'] = false;
-        $this->index_archive = new IndexArchiveBundle($dir, false,
-            serialize($info), C\NUM_DOCS_PER_GENERATION, false);
+        $info['DESCRIPTION'] = "feed";
+        $info[self::DIRECTION] = self::BACKWARD;
+        $index_archive = new IndexArchiveBundle($dir, false,
+            serialize($info), C\NUM_DOCS_PER_GENERATION);
         $this->db->setWorldPermissionsRecursive($dir);
-        $prune_shard =  new IndexShard($prune_shard_name);
+        $prune_shard = new IndexShard($prune_shard_name);
         $too_old = $time - $age;
         $num_sites = 0;
         if (!$prune_shard) {
@@ -674,8 +674,9 @@ class FeedsUpdateJob extends MediaJob
                     $meta_ids[] = "safe:false";
                     $meta_ids[] = "safe:all";
                 }
-                $prune_shard->addDocumentWords($doc_keys, self::NEEDS_OFFSET_FLAG,
-                    $word_and_qa_lists["WORD_LIST"], $meta_ids, true, false);
+                $prune_shard->addDocumentWords($doc_keys,
+                    self::NEEDS_OFFSET_FLAG, $word_and_qa_lists["WORD_LIST"],
+                    $meta_ids, true, false);
                 $this->updateTrendingTermCounts($term_counts, $phrase_string,
                     $word_and_qa_lists["WORD_LIST"], $media_category,
                     $source_name, $lang,
@@ -694,42 +695,39 @@ class FeedsUpdateJob extends MediaJob
             unset($term_counts['seen']);
             $this->addTermCountsTrendingTable($db, $term_counts);
         }
-        L\crawlLog("----..deleting old feed items");
+        L\crawlLog("----..deleting old feed items");
         $sql = " DELETE FROM FEED_ITEM ";
         $db->execute($sql);
         L\crawlLog("----..done deleting old items");
-        // 1. check if indexshard is full or not. if it is, new gen
-        $generation = $this->index_archive->initGenerationToAdd(
-                $prune_shard->num_docs, null);
-        if ($generation != -1) {
-            $summary_offsets = [];
-            if (!empty($seen_sites)) {
-                // 2. add pages, get summary_offset
-                $this->index_archive->addPages($generation, self::SUMMARY_OFFSET,
-                    $seen_sites, $seen_url_count);
-                // keeping track of duplicates
-                $sql = " INSERT INTO FEED_ITEM (GUID) VALUES (?)";
-                foreach ($seen_sites as $site) {
-                    $result = $db->execute($sql, [$site[self::HASH]]);
-                    $site_url = str_replace('|', "%7C", $site[self::URL]);
-                    $host = UrlParser::getHost($site_url);
-                    $raw_guid = L\unbase64Hash($site[self::HASH]);
-                    $hash = L\crawlHash($site[self::URL], true) .
-                        $raw_guid . "d". substr(L\crawlHash(
-                        UrlParser::getHost($site[self::URL]) . "/", true), 1);
-                    $summary_offsets[$hash] = $site[self::SUMMARY_OFFSET];
-                }
-                unset($seen_sites);
-            }
-            $prune_string = $prune_shard->save(true, true);
-            $tmp_shard = IndexShard::load("news" , $prune_string);
-            if (!empty($summary_offsets)) {
-                $tmp_shard->changeDocumentOffsets($summary_offsets);
-                $this->index_archive->addIndexData($tmp_shard);
-                $this->index_dirty = true;
+        // 1. check if index shard is full or not. if it is, new gen
+        $generation = $index_archive->initGenerationToAdd(
+            $prune_shard->num_docs);
+        $summary_offsets = [];
+        if (!empty($seen_sites)) {
+            // 2. add pages, get summary_offset
+            $index_archive->addPages($generation,
+                self::SUMMARY_OFFSET, $seen_sites, $seen_url_count);
+            // keeping track of duplicates
+            $sql = " INSERT INTO FEED_ITEM (GUID) VALUES (?)";
+            foreach ($seen_sites as $site) {
+                $result = $db->execute($sql, [$site[self::HASH]]);
+                $site_url = str_replace('|', "%7C", $site[self::URL]);
+                $host = UrlParser::getHost($site_url);
+                $raw_guid = L\unbase64Hash($site[self::HASH]);
+                $hash = L\crawlHash($site[self::URL], true) .
+                    $raw_guid . "d". substr(L\crawlHash(
+                    UrlParser::getHost($site[self::URL]) . "/", true), 1);
+                $summary_offsets[$hash] = $site[self::SUMMARY_OFFSET];
             }
-            $this->index_archive->stopIndexingBundle();
+            unset($seen_sites);
+        }
+        $prune_string = $prune_shard->save(true, true);
+        $tmp_shard = IndexShard::load("news" , $prune_string);
+        if (!empty($summary_offsets)) {
+            $tmp_shard->changeDocumentOffsets($summary_offsets);
+            $index_archive->addIndexData($tmp_shard);
         }
+        $index_archive->stopIndexingBundle();
         if (file_exists($prune_shard_name)) {
             unlink($prune_shard_name);
         }
@@ -748,7 +746,7 @@ class FeedsUpdateJob extends MediaJob
      * @param array $word_or_phrase_list associate array of
      *      stemmed_word_or_phrase => positions in feed item of where occurs
      * @param string $media_category of feed source the item case from. We
-     *      tredning counts grouped by media category
+     *      trending counts grouped by media category
      * @param string $source_name of feed source the item case from. We exclude
      *      from counts the name of the feed source
      * @param string $lang locale_tag for this feed item
@@ -853,7 +851,7 @@ class FeedsUpdateJob extends MediaJob
      * Updates TRENDING_TERM, hourly, daily, and weekly top term occurrences.
      * Removes entries older than a week
      *
-     * @param resource $db hadnle to database with TRENDING_TERM table
+     * @param resource $db handle to database with TRENDING_TERM table
      * @param array $term_counts for the most recent uupdate of the
      *      FEED_ITEM table an array [$lang => [$term => $occurences]]
      *      for the top NUM_TRENDING terms per language
@@ -864,7 +862,7 @@ class FeedsUpdateJob extends MediaJob
         $update_intervals = [
             C\ONE_HOUR => [24, C\ONE_DAY],
             C\ONE_DAY => [7, C\ONE_WEEK],
-            C\ONE_WEEK => [4, 4*C\ONE_WEEK],
+            C\ONE_WEEK => [4, 4 * C\ONE_WEEK],
         ];
         $num_timestamp_sql =
             "SELECT COUNT(DISTINCT TIMESTAMP) AS NUM_TIMESTAMPS " .
diff --git a/src/models/CrawlModel.php b/src/models/CrawlModel.php
index cc6ce9ae5..be5060fa3 100755
--- a/src/models/CrawlModel.php
+++ b/src/models/CrawlModel.php
@@ -1184,7 +1184,7 @@ EOT;
                 $crawl['CRAWL_TIME'] = $matches[2];
             } else {
                 $bundle_class_name = C\NS_LIB . "IndexArchiveBundle";
-                $crawl['CRAWL_TIME'] = 13;
+                $crawl['CRAWL_TIME'] = self::FEED_CRAWL_TIME;
             }
             $info = $bundle_class_name::getArchiveInfo($dir);
             if (isset($info['DESCRIPTION'])) {
@@ -1220,8 +1220,7 @@ EOT;
                     }
                     $sub_dir = opendir($sub_path);
                     $i = 0;
-                    while (($sub_name = readdir($sub_dir)) !==  false &&
-                        $i < 5) {
+                    while (($sub_name=readdir($sub_dir)) !== false && $i < 5) {
                         if ($sub_name[0] == 'A' && $sub_name[1] == 't') {
                             $crawl['RESUMABLE'] = true;
                             break 2;
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index d90732381..2b5cdb604 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -198,9 +198,6 @@ class PhraseModel extends ParallelModel
      * @param int $save_timestamp if this timestamp is nonzero, then save
      *     iterate position, so can resume on future queries that make
      *     use of the timestamp
-     * @param bool $limit_feeds if true the number of feed shard items to
-     *     allow in search results is limited to
-     *      WordIterator::LIMIT_FEEDS_COUNT
      *
      * @return array an array of summary data
      */
@@ -208,7 +205,7 @@ class PhraseModel extends ParallelModel
         $input_phrase, $low = 0, $results_per_page = C\NUM_RESULTS_PER_PAGE,
         $format = true, $filter = null, $use_cache_if_allowed = true,
         $raw = 0, $queue_servers = [], $guess_semantics = true,
-        $save_timestamp = 0, $limit_feeds = true)
+        $save_timestamp = 0)
     {
         if (C\QUERY_STATISTICS) {
             $indent= "&nbsp;&nbsp;";
@@ -398,7 +395,7 @@ class PhraseModel extends ParallelModel
             $out_results = $this->getSummariesByHash($word_structs,
                 $low, $phrase_num, $filter, $use_cache_if_allowed, $raw,
                 $queue_servers, $phrase, $save_timestamp_name,
-                $limit_feeds, $format_words);
+                $format_words);
             if (isset($out_results['PAGES']) &&
                 count($out_results['PAGES']) != 0) {
                 $out_count = 0;
@@ -1062,8 +1059,6 @@ class PhraseModel extends ParallelModel
      *     save iterate position, so can resume on future queries that make
      *     use of the timestamp. If used then $limit ignored and get next $num
      *     docs after $save_timestamp 's previous iterate position.
-     * @param bool $limit_feeds if true the number of feed shard items to
-     *     allow in search results is limited to WordIterator::LIMIT_FEEDS_COUNT
      * @param array $format_words words which should be highlighted in
      *     search snippets returned
      * @return array document summaries
@@ -1071,7 +1066,7 @@ class PhraseModel extends ParallelModel
     public function getSummariesByHash($word_structs, $limit, $num, $filter,
         $use_cache_if_allowed = true, $raw = 0, $queue_servers = [],
         $original_query = "", $save_timestamp_name = "",
-        $limit_feeds = true, $format_words = null)
+        $format_words = null)
     {
         $indent= "&nbsp;&nbsp;";
         $in2 = $indent . $indent;
@@ -1170,7 +1165,7 @@ class PhraseModel extends ParallelModel
         $get_query_time = microtime(true);
         $query_iterator = $this->getQueryIterator($word_structs, $filter, $raw,
             $to_retrieve, $queue_servers, $original_query,
-            $save_timestamp_name, $limit_feeds);
+            $save_timestamp_name);
         $get_query_time = L\changeInMicrotime($get_query_time);
         $num_retrieved = 0;
         $pages = [];
@@ -1584,15 +1579,13 @@ class PhraseModel extends ParallelModel
      * @param string $save_timestamp_name if this timestamp is non empty, then
      *     when making iterator get sub-iterators to advance to gen doc_offset
      *     stored with respect to save_timestamp if exists.
-     * @param bool $limit_feeds if true the number of feed shard items to
-     *     allow in search results is limited to WordIterator::LIMIT_FEEDS_COUNT
      *
      * @return &object an iterator for iterating through results to the
      * query
      */
     public function getQueryIterator($word_structs, $filter, $raw,
         &$to_retrieve, $queue_servers = [], $original_query = "",
-        $save_timestamp_name = "", $limit_feeds = true)
+        $save_timestamp_name = "")
     {
         $iterators = [];
         $total_iterators = 0;
@@ -1694,39 +1687,29 @@ class PhraseModel extends ParallelModel
                                 $distinct_key[1] : 0;
                             $distinct_key_id = L\unbase64Hash(
                                 $distinct_key[0]);
-                            // 13 is somewhat of a magic number right now
-                            if ($index_name == 13) {
-                                $dir_name = C\CRAWL_DIR."/cache/"
-                                    .self::index_data_base_name.$index_name;
-                            } else {
-                                $dir_name = C\CRAWL_DIR."/cache/"
-                                    .self::index_data_base_name.$index_name;
-                            }
                             $index = IndexManager::getIndex($index_name);
-                            $archive_info = $index->getArchiveInfo($dir_name);
-                            $description = unserialize($archive_info['DESCRIPTION']);
-                            if (isset($description['FORWARD_DIRECTION'])) {
-                                $forward_direction = $description['FORWARD_DIRECTION'];
+                            $archive_info = $index->getArchiveInfo(
+                                $index->dir_name);
+                            $description = unserialize(
+                                $archive_info['DESCRIPTION']);
+                            if (isset($description[self::DIRECTION])) {
+                                $direction =
+                                    $description[self::DIRECTION];
                             } else {
-                                $forward_direction = 1;
+                                $direction = self::FORWARD;
                             }
-                            // will have to change index name for checking iterator
-                            if ($forward_direction) {
+                            // have to change index name for checking iterator
+                            if ($direction == self::FORWARD) {
                                 $tmp_word_iterators[$m] =
                                     new I\WordIterator($distinct_key_id, $shift,
-                                    $index_name, true, $filter, $to_retrieve,
-                                    $limit_feeds);
-                            }
-                            else {
-                                $tmp_word_iterators[$m] =
-                                    new I\ReverseIterator($distinct_key_id, $shift,
-                                    $index_name, true, $filter, $to_retrieve,
-                                    $limit_feeds);
+                                    $index_name, true, $filter, $to_retrieve);
+                            } else {
+                                $tmp_word_iterators[$m] = new I\ReverseIterator(
+                                    $distinct_key_id, $shift, $index_name, true,
+                                    $filter, $to_retrieve);
                             }
                             $sum += $tmp_word_iterators[$m]->num_docs;
-                            if ($tmp_word_iterators[$m]->dictionary_info !=
-                                [] ||
-                                $tmp_word_iterators[$m]->feed_count > 0) {
+                            if ($tmp_word_iterators[$m]->dictionary_info !=[]) {
                                 $min_group_override = true;
                                 $m++;
                             } else {
diff --git a/tests/IndexShardTest.php b/tests/IndexShardTest.php
index 124b270da..2d1cf11b3 100644
--- a/tests/IndexShardTest.php
+++ b/tests/IndexShardTest.php
@@ -152,8 +152,8 @@ class IndexShardTest extends UnitTest
     }
     /**
      * Check if can store documents into a reverse index shard and retrieve them
-     * Shard is just a normal regular IndexShard, while Shard4 sets the additional
-     * flag which makes everything go in reverse
+     * Shard is just a normal regular IndexShard, while Shard4 sets the
+     * additional flag which makes everything go in reverse
      */
     public function addDocumentsGetPostingsSliceReverseTestCase()
     {
@@ -304,13 +304,13 @@ class IndexShardTest extends UnitTest
             "First offset set correctly");
         $this->assertEqual($last_offset, 40,
             "Second offset set correctly");
-        $forward = $this->test_objects['shard']->nextPostingOffsetDocOffset($first_offset, $last_offset, 5);
-        //print_r($forward);
-        $backward = $this->test_objects['shard4']->nextPostingOffsetDocOffset($first_offset, $last_offset, 5);
-        //print_r($backward);
+        $forward = $this->test_objects['shard']->nextPostingOffsetDocOffset(
+            $first_offset, $last_offset, 5);
+        $backward = $this->test_objects['shard4']->nextPostingOffsetDocOffset(
+            $first_offset, $last_offset, 5);
         $forward = $this->test_objects['shard']->getPostingsSlice($first_offset,
-                $first_offset, $last_offset, 5);
-        # have to reset offset values, since getPostingsSlice modifies by ref
+            $first_offset, $last_offset, 5);
+        // have to reset offset values, since getPostingsSlice modifies by ref
         $info = $this->test_objects['shard4']->getWordInfo(
             L\crawlHashWord('CCCCCCCC', true), true);
         list($first_offset, $last_offset,
@@ -325,17 +325,18 @@ class IndexShardTest extends UnitTest
         $index_name = 1573453725;
         $index_name = 1575422839;
         $index_archive_name = "IndexData" . $index_name;
-        $index_archive_name = "IndexDataNewsFeed";
-        $index_name = "NewsFeed";
+        $index_archive_name = "IndexDataFeed";
+        $index_name = "feed";
         $results_limit = 200;
         $total_results = 0;
         if (file_exists(C\CRAWL_DIR.'/cache/' . $index_archive_name)) {
-            $info = IndexManager::getWordInfo($index_name, $hash_key, $shift, -1, 0, -1);
+            $info = IndexManager::getWordInfo($index_name, $hash_key, $shift,
+                -1, 0, -1);
             $this->assertTrue(isset($info[0][4]));
             $forward = [];
             if (isset($info[0][4])) {
-                $word_iterator = new WordIterator($info[0][4], 0, $index_name, true, null, $results_limit);
-                // $norm_docs = $word_iterator->findDocsWithWord();
+                $word_iterator = new WordIterator($info[0][4], 0, $index_name,
+                    true, null, $results_limit);
                 $forward_offsets = [];
                 $offset = $word_iterator->currentGenDocOffsetWithWord();
                 array_push($forward_offsets, $offset);
@@ -352,11 +353,12 @@ class IndexShardTest extends UnitTest
                 $for_results = count($forward_offsets);
             }
             $backward = [];
-            $info = IndexManager::getWordInfo($index_name, $hash_key, $shift, -1, 0, -1);
+            $info = IndexManager::getWordInfo($index_name, $hash_key, $shift,
+                -1, 0, -1);
             $this->assertTrue(isset($info[0][4]));
             if (isset($info[0][4])) {
-                $word_rev_iterator = new ReverseIterator($info[0][4], 0, $index_name, true, null, $results_limit);
-                // $rev_docs = $word_rev_iterator->findDocsWithWord();
+                $word_rev_iterator = new ReverseIterator($info[0][4], 0,
+                    $index_name, true, null, $results_limit);
                 $backward_offsets = [];
                 $offset = $word_rev_iterator->currentGenDocOffsetWithWord();
                 array_push($backward_offsets, $offset);
@@ -800,4 +802,4 @@ class IndexShardTest extends UnitTest
         $this->assertTrue(isset($c_data["AAAAAAAABBBBBBBBCCCCCCCC"]),
             "Save without dictionary test works");
     }
-}
\ No newline at end of file
+}
ViewGit