viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
Filename | |
---|---|
src/executables/ArcTool.php | |
src/library/IndexDocumentBundle.php | |
src/library/IndexManager.php | |
src/library/index_bundle_iterators/WordIterator.php |
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php index 2c8345806..da30b797f 100755 --- a/src/executables/ArcTool.php +++ b/src/executables/ArcTool.php @@ -1476,6 +1476,8 @@ EOD; $recent_log_times[$i] = time(); } $rebuild_dones = []; + $save_partition = 10; + echo "$next_partition < $save_partition\n"; while ($next_partition < $save_partition) { if ($old_next_partition != $next_partition) { $old_next_partition = $next_partition; @@ -1548,7 +1550,6 @@ EOD; */ $this->rebuildIndexBundle($archive_path, $start_generation, false); } - $index_archive->forceSave(); echo "\nIndex $rebuild complete!\n"; } /** diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php index d565177ae..ed65f7a05 100644 --- a/src/library/IndexDocumentBundle.php +++ b/src/library/IndexDocumentBundle.php @@ -1539,7 +1539,7 @@ class IndexDocumentBundle implements CrawlConstants static $memory_limit = 0; if (!$memory_limit) { $memory_limit = - C\INDEX_FILE_MEMORY_LIMIT * C\MEMORY_FILL_FACTOR; + metricToInt(C\INDEX_FILE_MEMORY_LIMIT) * C\MEMORY_FILL_FACTOR; } if (memory_get_usage() > $memory_limit || count($file_handles) > self::MAX_POSTING_CACHE_ITEMS) { diff --git a/src/library/IndexManager.php b/src/library/IndexManager.php index 56a23aecb..ab867c8e9 100644 --- a/src/library/IndexManager.php +++ b/src/library/IndexManager.php @@ -286,15 +286,15 @@ class IndexManager implements CrawlConstants return $tmp; } $index = self::getIndex($index_name); - $pre_info = []; $start_generation = ($start_generation < 0) ? 0 : $start_generation; - $info_cache[$lookup_hash] = $index->getWordInfo($term_id, + $word_info = $index->getWordInfo($term_id, $threshold, $start_generation, $num_distinct_generations, $with_remaining_total); + $info_cache[$lookup_hash] = $word_info; if (count($info_cache) >= self::INFO_CACHE_SIZE) { array_shift($info_cache); } - return $info_cache[$lookup_hash]; + return $word_info; } /** * Finds posting info related to the most recent version diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php index da752e575..11d6bbd22 100644 --- a/src/library/index_bundle_iterators/WordIterator.php +++ b/src/library/index_bundle_iterators/WordIterator.php @@ -128,12 +128,6 @@ class WordIterator extends IndexBundleIterator * @var int */ public $next_offset; - /** - * Used to keep track of whether getWordInfo might still get more - * data on the search terms as advance generations - * @var bool - */ - public $no_more_generations; /** * The total number of shards that have data for this word * @var int @@ -329,41 +323,31 @@ class WordIterator extends IndexBundleIterator $this->index_version = IndexManager::getVersion($index_name); $word_info = IndexManager::getWordInfo($index_name, $word_key, -1, -1, C\NUM_DISTINCT_GENERATIONS, true); - if ($this->index_version < 3) { - list($this->num_docs, $this->dictionary_info) = $word_info; - } else { - $this->total_num_docs = $word_info['TOTAL_NUM_DOCS'] ?? 0; - $this->total_num_docs_and_links = - $word_info['TOTAL_NUM_LINKS_AND_DOCS'] ?? 0; - $this->max_items_per_partition = - $word_info['MAX_ITEMS_PER_PARTITION'] ?? - PartitionDocumentBundle::MAX_ITEMS_PER_FILE; - $this->avg_items_per_partition = - $word_info['AVG_ITEMS_PER_PARTITION'] ?? - PartitionDocumentBundle::MAX_ITEMS_PER_FILE; - $this->total_number_of_partitions = - $word_info['TOTAL_NUMBER_OF_PARTITIONS'] ?? 0; - $this->num_docs = $word_info['TOTAL_COUNT'] ?? 0; - $this->num_occurrences = $word_info['TOTAL_OCCURRENCES'] ?? 0; - $this->dictionary_info = $word_info['ROWS'] ?? []; - $this->threshold_exceeded = $word_info['THESHOLD_EXCEEDED'] ?? - false; - $this->archive_file = $word_info['ARCHIVE_FILE'] ?? ""; - } + $this->total_num_docs = $word_info['TOTAL_NUM_DOCS'] ?? 0; + $this->total_num_docs_and_links = + $word_info['TOTAL_NUM_LINKS_AND_DOCS'] ?? 0; + $this->max_items_per_partition = + $word_info['MAX_ITEMS_PER_PARTITION'] ?? + PartitionDocumentBundle::MAX_ITEMS_PER_FILE; + $this->avg_items_per_partition = + $word_info['AVG_ITEMS_PER_PARTITION'] ?? + PartitionDocumentBundle::MAX_ITEMS_PER_FILE; + $this->total_number_of_partitions = + $word_info['TOTAL_NUMBER_OF_PARTITIONS'] ?? 0; + $this->num_docs = $word_info['TOTAL_COUNT'] ?? 0; + $this->num_occurrences = $word_info['TOTAL_OCCURRENCES'] ?? 0; + $this->dictionary_info = $word_info['ROWS'] ?? []; + $this->threshold_exceeded = $word_info['THESHOLD_EXCEEDED'] ?? + false; + $this->archive_file = $word_info['ARCHIVE_FILE'] ?? ""; if (empty($this->dictionary_info)) { $this->empty = true; $this->num_generations = 0; } else { - if ($this->index_version < 3) { - ksort($this->dictionary_info); - $this->dictionary_info = array_values($this->dictionary_info); - } $this->num_generations = count($this->dictionary_info); $this->empty = ($this->num_generations == 0); } $this->term_info_computed = true; - $this->no_more_generations = $this->index_version >= 3 - || count($info) < C\NUM_DISTINCT_GENERATIONS; } /** * Hook function used by currentDocsWithWord to return the current block @@ -1059,35 +1043,6 @@ class WordIterator extends IndexBundleIterator $this->current_offset = ($is_ascending) ? $this->start_offset: $this->last_offset; } - if (!$this->no_more_generations) { - $gen_check = ($is_ascending) ? - ($this->current_generation < $generation && - $this->generation_pointer >= $this->num_generations) : - ($this->current_generation > $generation && - $this->generation_pointer <= 0); - if ($gen_check) { - $index_info = IndexManager::getWordInfo($this->index_name, - $this->word_key, 0, $this->num_generations, - C\NUM_DISTINCT_GENERATIONS, true); - list($estimated_remaining_total, $info) = $index_info; - if (count($info) > 0) { - $this->num_docs = $this->seen_docs + - $estimated_remaining_total; - ksort($info); - $this->dictionary_info = array_merge( - $this->dictionary_info, array_values($info)); - $this->num_generations = count($this->dictionary_info); - $this->no_more_generations = $this->index_version >= 3 - || count($info) < C\NUM_DISTINCT_GENERATIONS; - //will increment back to where were next loop - if ($is_ascending) { - $this->generation_pointer--; - } else { - $this->generation_pointer++; - } - } - } - } $gen_check = ($is_ascending) ? ($this->current_generation < $generation && $this->generation_pointer < $this->num_generations) :