viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
diff --git a/src/library/IndexShard.php b/src/library/IndexShard.php index 405b027f8..eca590ee5 100644 --- a/src/library/IndexShard.php +++ b/src/library/IndexShard.php @@ -688,7 +688,7 @@ class IndexShard extends PersistentStructure implements CrawlConstants $next = $posting_end + 1; $num_docs_or_links = self::numDocsOrLinks($start_offset, $last_offset, - $total_posting_len/$num_postings_so_far); + $total_posting_len / $num_postings_so_far); list($doc_id, , $item) = $this->makeItem($posting, $num_docs_or_links); $results[$doc_id] = $item; diff --git a/src/library/index_bundle_iterators/IndexBundleIterator.php b/src/library/index_bundle_iterators/IndexBundleIterator.php index bfb445b40..185ad5ec4 100644 --- a/src/library/index_bundle_iterators/IndexBundleIterator.php +++ b/src/library/index_bundle_iterators/IndexBundleIterator.php @@ -151,19 +151,20 @@ abstract class IndexBundleIterator implements CrawlConstants */ public function genDocOffsetCmp($gen_doc1, $gen_doc2) { - //less generation + //less generation or greater if ($gen_doc1[0] < $gen_doc2[0]) { return -1; + } else if ($gen_doc1[0] > $gen_doc2[0]) { + return 1; } - //equal generation - if ($gen_doc1[0] == $gen_doc2[0]) { - if ($gen_doc1[1] == $gen_doc2[1]) { - return 0; //equal offset - } else if ($gen_doc1[1] < $gen_doc2[1]) { - return -1; // less offset - } + //less offset or greater + if ($gen_doc1[1] < $gen_doc2[1]) { + return -1; + } else if ($gen_doc1[1] > $gen_doc2[1]) { + return 1; } - return 1; + //equal + return 0; } /** * Gets the current block of doc ids and score associated with the diff --git a/src/library/index_bundle_iterators/IntersectIterator.php b/src/library/index_bundle_iterators/IntersectIterator.php index a39dcf1a0..31a5e3736 100644 --- a/src/library/index_bundle_iterators/IntersectIterator.php +++ b/src/library/index_bundle_iterators/IntersectIterator.php @@ -107,7 +107,7 @@ class IntersectIterator extends IndexBundleIterator * Number of seconds before timeout and stop * syncGenDocOffsetsAmongstIterators if slow */ - const SYNC_TIMEOUT = 4; + const SYNC_TIMEOUT = 3; /** * Creates an intersect iterator with the given parameters. * @@ -400,13 +400,13 @@ class IntersectIterator extends IndexBundleIterator public function syncGenDocOffsetsAmongstIterators() { if ($this->sync_timer_on) { - $timer_on = true; if ($this->sync_time === 0) { $this->sync_time = time(); } $time_out = self::SYNC_TIMEOUT + $this->sync_time; } else { - $timer_on = false; + //will probably never timeout this way so like no timer + $time_out = 2 * (self::SYNC_TIMEOUT + $this->sync_time); } if (($biggest_gen_offset = $this->index_bundle_iterators[ 0]->currentGenDocOffsetWithWord()) == -1) { @@ -415,16 +415,12 @@ class IntersectIterator extends IndexBundleIterator $gen_doc_offset[0] = $biggest_gen_offset; $all_same = true; for ($i = 1; $i < $this->num_iterators; $i++) { - $cur_gen_doc_offset = - $this->index_bundle_iterators[ - $i]->currentGenDocOffsetWithWord(); - $gen_doc_offset[$i] = $cur_gen_doc_offset; - if ($timer_on && time() > $time_out) { - return -1; - } - if ($cur_gen_doc_offset == -1) { + if ((($cur_gen_doc_offset = $this->index_bundle_iterators[ + $i]->currentGenDocOffsetWithWord()) == -1) || + time() > $time_out) { return -1; } + $gen_doc_offset[$i] = $cur_gen_doc_offset; $gen_doc_cmp = $this->genDocOffsetCmp($cur_gen_doc_offset, $biggest_gen_offset); if ($gen_doc_cmp > 0) { @@ -440,20 +436,18 @@ class IntersectIterator extends IndexBundleIterator $last_changed = -1; $i = 0; while($i != $last_changed) { - if ($timer_on && time() > $time_out) { + if (time() > $time_out) { return -1; } - if ($last_changed == -1) $last_changed = 0; if ($this->genDocOffsetCmp($gen_doc_offset[$i], $biggest_gen_offset) < 0) { $iterator = $this->index_bundle_iterators[$i]; $iterator->advance($biggest_gen_offset); - $cur_gen_doc_offset = - $iterator->currentGenDocOffsetWithWord(); - $gen_doc_offset[$i] = $cur_gen_doc_offset; - if ($cur_gen_doc_offset == -1) { + if( ($cur_gen_doc_offset = + $iterator->currentGenDocOffsetWithWord()) == -1) { return -1; } + $gen_doc_offset[$i] = $cur_gen_doc_offset; if ($this->genDocOffsetCmp($cur_gen_doc_offset, $biggest_gen_offset) > 0) { $last_changed = $i; @@ -463,6 +457,7 @@ class IntersectIterator extends IndexBundleIterator $i++; if ($i == $this->num_iterators) { $i = 0; + $last_changed = max($last_changed, 0); } } return 1; diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php index a169ef11a..42c6d6052 100644 --- a/src/library/index_bundle_iterators/WordIterator.php +++ b/src/library/index_bundle_iterators/WordIterator.php @@ -409,14 +409,57 @@ class WordIterator extends IndexBundleIterator */ public function advance($gen_doc_offset = null) { - if ($gen_doc_offset != null) { //only advance if $gen_doc_offset bigger - $cur_gen_doc_offset = $this->currentGenDocOffsetWithWord(); - if ($cur_gen_doc_offset == -1 || - $this->genDocOffsetCmp($cur_gen_doc_offset, - $gen_doc_offset) >= 0) { - return; + if ($gen_doc_offset == null) { + $this->plainAdvance(); + return; + } + $cur_gen_doc_offset = $this->currentGenDocOffsetWithWord(); + if ($cur_gen_doc_offset == -1 || + $this->genDocOffsetCmp($cur_gen_doc_offset, + $gen_doc_offset) >= 0) { + return; + } + $this->plainAdvance(); + if ($this->current_generation < $gen_doc_offset[0]) { + $this->advanceGeneration($gen_doc_offset[0]); + $this->next_offset = $this->current_offset; + } + $using_feeds = $this->using_feeds && $this->use_feeds; + if ($using_feeds) { + $shard = IndexManager::getIndex("feed"); + $last = $this->feed_end; + } else { + $index = IndexManager::getIndex($this->index_name); + $index->setCurrentShard($this->current_generation, true); + $shard = $index->getCurrentShard(); + $last = $this->last_offset; + } + if ($this->current_generation == $gen_doc_offset[0]) { + $offset_pair = $shard->nextPostingOffsetDocOffset( + $this->next_offset, $last, $gen_doc_offset[1]); + if ($offset_pair === false) { + $this->advanceGeneration(); + $this->next_offset = $this->current_offset; + } else { + list($this->current_offset, $this->current_doc_offset) = + $offset_pair; } } + if ($this->current_generation == -1) { + $this->seen_docs = ($this->current_offset - $this->feed_start) / + IndexShard::POSTING_LEN; + } else { + $this->seen_docs = ($using_feeds) ? $this->feed_count : 0; + $this->seen_docs += ($this->current_offset - $this->start_offset) / + IndexShard::POSTING_LEN; + } + } + /** + * Forwards the iterator one group of docs. This is what's called + * by @see advance($gen_doc_offset) if $gen_doc_offset is null + */ + public function plainAdvance() + { $this->advanceSeenDocs(); $this->current_doc_offset = null; if ($this->current_offset < $this->next_offset) { @@ -432,45 +475,6 @@ class WordIterator extends IndexBundleIterator $this->advanceGeneration(); $this->next_offset = $this->current_offset; } - if ($gen_doc_offset !== null) { - if ($this->current_generation < $gen_doc_offset[0]) { - $this->advanceGeneration($gen_doc_offset[0]); - $this->next_offset = $this->current_offset; - } - $using_feeds = $this->using_feeds && $this->use_feeds; - if ($using_feeds) { - $shard = IndexManager::getIndex("feed"); - $last = $this->feed_end; - } else { - $index = IndexManager::getIndex($this->index_name); - $index->setCurrentShard($this->current_generation, true); - $shard = $index->getCurrentShard(); - $last = $this->last_offset; - } - - if ($this->current_generation == $gen_doc_offset[0]) { - $offset_pair = - $shard->nextPostingOffsetDocOffset($this->next_offset, - $last, $gen_doc_offset[1]); - if ($offset_pair === false) { - $this->advanceGeneration(); - $this->next_offset = $this->current_offset; - } else { - list($this->current_offset, - $this->current_doc_offset) = $offset_pair; - } - } - if ($this->current_generation == -1) { - $this->seen_docs = - ($this->current_offset - $this->feed_start)/ - IndexShard::POSTING_LEN; - } else { - $this->seen_docs = ($using_feeds) ? $this->feed_count : 0; - $this->seen_docs += - ($this->current_offset - $this->start_offset)/ - IndexShard::POSTING_LEN; - } - } } /** * Switches which index shard is being used to return occurrences of