viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php index 2faed10e5..a40ece3e9 100644 --- a/src/library/IndexDocumentBundle.php +++ b/src/library/IndexDocumentBundle.php @@ -1614,28 +1614,39 @@ class IndexDocumentBundle implements CrawlConstants $len_posting_strings = strlen($postings_string); for ($i = 0; $i < $num_items; $i++) { if (!isset($postings_string[$current_pos])) { - // crawlLog("Posting decode error"); - // crawlLog("..Number to decode items: " . $num_items); - // crawlLog("..Number decoded: " . $i); - // crawlLog("..Length posting string: " . - // strlen($postings_string)); - // crawlLog("..Current position: " . $current_pos); - return []; // sanity check 1 + crawlLog("Posting decode error"); + crawlLog("..Number to decode items: " . $num_items); + crawlLog("..Number decoded: " . $i); + crawlLog("..Length posting string: " . + strlen($postings_string)); + crawlLog("..Current position: " . $current_pos); + return [$items, $sum_frequencies]; // sanity check 1 } $int_info = ord($postings_string[$current_pos]); $current_pos++; $len_unpack_info = $unpack_len_map[$int_info]; if ($current_pos + $len_unpack_info > $len_posting_strings) { - // crawlLog("Posting decode error"); - // crawlLog("..Number to decode items: " . $num_items); - // crawlLog("..Number decoded: " . $i); - // crawlLog("..Length posting string: " . - // strlen($postings_string)); - // crawlLog("..Current position: " . $current_pos); - return []; // sanity check 2 + crawlLog("Posting decode error"); + crawlLog("..Number to decode items: " . $num_items); + crawlLog("..Number decoded: " . $i); + crawlLog("..Length posting string: " . + strlen($postings_string)); + crawlLog("..Current position: " . $current_pos); + return [$items, $sum_frequencies]; // sanity check 2 } $pre_item = unpack($unpack_map[$int_info], $postings_string, $current_pos); + if ($pre_item["FREQUENCY"] > C\MAX_DESCRIPTION_LEN) { + crawlLog("Posting decode error! Frequency too large"); + crawlLog("..Number to decode items: " . $num_items); + crawlLog("..Number decoded: " . $i); + crawlLog("..Length posting string: " . + strlen($postings_string)); + crawlLog("..Current position: " . $current_pos); + crawlLog("..Large Frequency Observed: ". + $pre_item["FREQUENCY"] . " ". C\MAX_DESCRIPTION_LEN); + return [$items, $sum_frequencies]; // sanity check 3 + } $item = $pre_item; $item["DOC_MAP_INDEX"] += $doc_map_index; $item["POSITIONS_OFFSET"] += $positions_offset; diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php index 0b35c4a89..65bb718e6 100644 --- a/src/library/index_bundle_iterators/WordIterator.php +++ b/src/library/index_bundle_iterators/WordIterator.php @@ -510,7 +510,6 @@ class WordIterator extends IndexBundleIterator } else { $position_list = []; } - return $position_list; } /** @@ -801,12 +800,12 @@ class WordIterator extends IndexBundleIterator $first_index = $mid_index; } } - $weight = $descriptions_scores[$first_index]['SCORE'];; + $weight = $descriptions_scores[$first_index]['SCORE']; $start_description_pos = $descriptions_scores[$first_index]['POS']; - $len_description = ($first_index == $num_scores - 1) ? + $len_description = max(abs(($first_index == $num_scores - 1) ? $pseudo_doc_length - $start_description_pos : $descriptions_scores[$first_index + 1]['POS'] - - $start_description_pos; + $start_description_pos), $len_term, 1); $frequency_term = $weight * $len_term / $len_description; if ($position <= 0) { $bonuses += $weight; //$frequency_term;