viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php index 1235b4d14..81835fab3 100644 --- a/src/library/IndexDocumentBundle.php +++ b/src/library/IndexDocumentBundle.php @@ -1533,12 +1533,14 @@ class IndexDocumentBundle implements CrawlConstants { static $file_handles = []; static $memory_limit = 0; + $max_cache_size = 500; if (!$memory_limit) { $memory_limit = metricToInt(ini_get("memory_limit")) * C\MEMORY_FILL_FACTOR; } - if (memory_get_usage() > $memory_limit) { - $file_handles = []; /*just in case file handles causing + if (memory_get_usage() > $memory_limit || + count($file_handles) > $max_cache_size) { + array_shift($file_handles); /*just in case file handles causing memory leak */ } if (empty($file_handles[$partition])) { @@ -1552,6 +1554,8 @@ class IndexDocumentBundle implements CrawlConstants $file_handles[$partition] = $fh; } else { $fh = $file_handles[$partition]; + unset($file_handles[$partition]); // move to front of queue + $file_handles[$partition] = $fh; } if ($fh && fseek($fh, $offset) == 0 && $len > 0) { $out = fread($fh, $len); @@ -1639,7 +1643,7 @@ class IndexDocumentBundle implements CrawlConstants * POSITION_OFFSETS values. It also computes the of the frequencies of items * within the list of postings. This method is current only used for * active partition in an index (the one whose terms haven't yet been added - * to the B+-tree). + * to the LSMtree). * * @param array &$postings a reference to an array of posting lists for a * term (this will be changed by this method) diff --git a/src/library/LSMTree.php b/src/library/LSMTree.php index 639ad42f5..8dc98f979 100644 --- a/src/library/LSMTree.php +++ b/src/library/LSMTree.php @@ -89,11 +89,13 @@ class LSMTree */ public $max_file_size; /** + * * @var Tier */ public $put_slot = null; /** - * + * PackedTableTools used to pack/unpack key/values records + * @var PackedTableTools */ public $table_tools; /** @@ -217,7 +219,9 @@ class LSMTree crawlLog("..End LSMTiers Merging Tiers.."); } /** + * Deletes tier $tier from the LSMTree * + * @param int $tier tier to delete from LSMTree */ public function emptyTier($tier) { @@ -247,7 +251,11 @@ class LSMTree return $encoded_key . $out_values; } /** + * Merges two tier slots of the same tier $tier into a single tier slot + * at tier $tier + 1. If fewer than two slots filled at a given tier + * than does nothing. * + * @param int $tier tier to perform merging for */ public function mergeTier($tier) { @@ -327,7 +335,12 @@ class LSMTree return $rows; } /** + * Returns the values associated with a key in a given Tier of the + * LSMTree * + * @param int $tier tier to get values from + * @param string $key key to look up values for + * @return array values associated with $key (unpacked) */ public function getTier($tier, $key) { @@ -338,37 +351,17 @@ class LSMTree $slot = new Tier($slot_folder, $this->table_tools); return $slot->get($key); } - /** - * Save the operating parameters of this LSMTree - */ - public function saveParameters() - { - $parameter_path = $this->folder . "/" . self::PARAMETERS_FILE; - file_put_contents($parameter_path, serialize($this->parameters), - LOCK_EX); - } - /** - * Returns the parameters (such as its signature, max keys per nodes, etc) - * used to configure the LSMTree stored at $folder - * - * @param string $folder file path to a stored LSMTree - * @return array configuration info about the LSMTree - */ - public static function getParameterInfo($folder) - { - $parameter_path = $folder . "/" . self::PARAMETERS_FILE; - if(file_exists($parameter_path)) { - $parameters = unserialize(file_get_contents($parameter_path)) ?? []; - return $parameters; - } else { - return []; - } - } } /** + * Splits a string containing one row of data for the LSMTree into the + * key and a string for the values. * + * @param string $entry string encoded row to be split + * @param PackedTableTools $table_tools which has the format used to + * encode the entry + * @return array [$key, $values (as a string)] */ -function entryToKeyValues($entry, $table_tools, $decode_key = false) +function entryToKeyValues($entry, $table_tools) { $key_len = $table_tools->key_len; $key = substr($entry, 0, $key_len); @@ -376,68 +369,75 @@ function entryToKeyValues($entry, $table_tools, $decode_key = false) return [$key, $values]; } /** - * + * Auxiliary Class used to manage a single Tier from the Logarithmic Merge Tree + * structure */ class Tier { /** - * + * how many data files should be in a block folder before making a + * new block folder + * @var int */ public $block_factor; /** - * + * For data that has not been flushed to disk, the first key in sorted + * order + * @var string */ public $first_active_key; /** - * + * File path to where data in this tier is to be stored + * @var string */ public $folder; /** - * + * @var int */ public $iterator_folder_index; /** - * + * @var array */ public $iterator_folders; /** - * + * @var int */ public $iterator_file_index; /** - * + * @var array */ public $iterator_files; /** - * + * @var int */ public $iterator_entry_index; /** - * + * @var array */ public $iterator_entries; /** - * + * @var int */ public $max_file_size; /** - * + * Access mode for data in this tier: r - read, w - write + * @var string */ public $mode; /** - * + * @var PackedTableTools */ public $table_tools; /** - * + * @var string */ private $records; /** - * + * @var string */ private $active_filename; /** - * + * @var array */ private static $cache = []; /** @@ -584,7 +584,7 @@ class Tier } if (empty($this->records)) { list($this->first_active_key,) = entryToKeyValues($entry, - $table_tools, true); + $table_tools); } $separator = (strlen($this->records) > 0) ? "\xFF" : ""; $this->records .= $separator . $encoded_entry; @@ -617,7 +617,10 @@ class Tier return $haystack[$low]; } /** + * Returns the first entry as a packed string in the LSMTree tier. Also + * resets iterator of this object. * + * @return string|bool first entry if exists, else false */ public function firstEntry() { @@ -649,7 +652,9 @@ class Tier return decode255($this->iterator_entries[0]); } /** - * + * Returns the next tier entry as a packed string iterated over by this + * Tier object. + * @return string|bool next entry if exists, else false */ public function next() { @@ -692,7 +697,8 @@ class Tier return decode255($this->iterator_entries[0]) ?? false; } /** - * + * Resets to the first entry of the tier, the iterator associated with + * the current Tier object. */ public function reset() { @@ -704,7 +710,14 @@ class Tier $this->iterator_entries = []; } /** + * Write a sequence of string records $lines into the file $filename, + * separating records with delimiter $delimiter. Deletes file from LRU + * cache of read files * + * @param string $filename name of file to write records to + * @param array $lines records to write to $filename + * @param string $delimiter string used to separate one records from the + * next */ function writeRecords($filename, $lines, $delimiter = "\n") { @@ -713,7 +726,13 @@ class Tier unset(self::$cache[$name_hash]); } /** - * + * Returns the contents of a file managed by this LSMTree + * as a sequence of string records. Contents come from either + * a cache or from the filesystem. Has logic for LRU cache + * @param string $filename name of file to get records for + * @param string $delimiter delimeter used to separate individual + * records + * @return array of string records */ function readRecords($filename, $delimiter = "\n") { @@ -726,7 +745,7 @@ class Tier } self::$cache[$name_hash] = explode($delimiter, file_get_contents($filename)); - if (count(self::$cache[$name_hash]) >= LSMTRee::RECORD_CACHE_SIZE) { + if (count(self::$cache[$name_hash]) >= LSMTree::RECORD_CACHE_SIZE) { array_shift(self::$cache); } return self::$cache[$name_hash]; diff --git a/tests/LSMTreeTest.php b/tests/LSMTreeTest.php index befaf32b1..d8fd5c194 100644 --- a/tests/LSMTreeTest.php +++ b/tests/LSMTreeTest.php @@ -157,8 +157,8 @@ class LSMTreeTest extends UnitTest $lsm_tree->put($entry); } $this->assertTrue(file_exists($lsm_tree->folder . - "/Tier0/A/F00000000000key12") && !file_exists($lsm_tree->folder . - "/Tier0/A/F00000000000key14"), + "/Tier0000/A/F00000000000key12") && !file_exists($lsm_tree->folder . + "/Tier0000/A/F00000000000key14"), "Correct number of block folders created"); } /** @@ -181,7 +181,7 @@ class LSMTreeTest extends UnitTest $entry = ["KEY" => $key, "VALUE" => "value$i"]; $lsm_tree->put($entry); } - $block_folder = $lsm_tree->folder . "/Tier0/A/F000000000000key0"; + $block_folder = $lsm_tree->folder . "/Tier0000/A/F000000000000key0"; $this->assertTrue(file_exists("$block_folder/D00000000000key13") && !file_exists("$block_folder/D00000000000key14"), "Correct number of data files created");