viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
<?php /** * SeekQuarry/Yioop -- * Open Source Pure PHP Search Engine, Crawler, and Indexer * * Copyright (C) 2009 - 2023 Chris Pollett chris@pollett.org * * LICENSE: * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. * * END LICENSE * * @author Chris Pollett chris@pollett.org * @license https://www.gnu.org/licenses/ GPL3 * @link https://www.seekquarry.com/ * @copyright 2009 - 2023 * @filesource */ namespace seekquarry\yioop\library; use seekquarry\yioop\configs as C; /** * This class implements the B+-tree structure over existing file system * * @author Chris Pollett */ class BPlusTree { /** * The default compressor object used to compress tree nodes */ const DEFAULT_COMPRESSOR = C\NS_COMPRESSORS . "NonCompressor"; /** * The MAXIMUM number of keys stored in a tree node (will split if have * more) */ const MAX_KEYS = 501; /** * Maximum size of a cache of any type used by this BPlusTree */ const MAX_CACHE_SIZE = 200; /** * Default parameters to use when constructing a BPlusTree */ const DEFAULT_PARAMETERS = ["NODE_COMPRESSOR" => self::DEFAULT_COMPRESSOR, "BLOB_COMPRESSOR" => self::DEFAULT_COMPRESSOR, "FORMAT" => ["PRIMARY KEY" => ["KEY", -1], "VALUE" => "BLOB"], "MAX_KEYS" => self::MAX_KEYS ]; /** * Internal nodes of BPlusTree are folders, subfolders/subfiles are * names according to their least key except for the first subdfolder/ * subfile of the node which is given the name of the LEAST_NODE_NAME * constant */ const LEAST_NODE_NAME = "start"; /** * Internal nodes of BPlusTree are folders. For nodes of the same * height in the tree NEXT_NODE_NAME is used as the name of the file * with the serialized name of the next folder of the same height in * the tree. */ const NEXT_NODE_NAME = "next"; /** * Name of temporary file used when splitting a BPlusTree node. */ const TEMP_NODE_NAME = "tmp_node"; /** * Leaf node archive file (used for blob and serial record columns) name * prefix */ const ARCHIVE_PREFIX = "a"; /** * Leaf node index record file name prefix */ const NODE_PREFIX = "i"; /** * File name of file used to store the parameters of this * BPlusTree */ const PARAMETERS_FILE = "bpt_parameters.txt"; /** * Field variable used as a cache for the file handle, file name, and * time of the last archive file for a B+-tree node added to * @var array */ public $add_archive_cache = [null, "", -1]; /** * Field variable used as a cache for the file handle, file name, and * time of the last archive file for a B+-tree node accessed for a value * @var array */ public $get_archive_cache = [null, "", -1]; /** * Array of column names for the columns in a BPlusTree which * are of type BLOB or SERIAL * @var array */ public $blob_columns; /** * The seekquarry\yioop\library\compressors\Compressor object used to * compress node files. * @var object */ public $node_compressor; /** * The seekquarry\yioop\library\compressors\Compressor object used to * compress blob columns. * @var object */ public $blob_compressor; /** * Folder for storing the B-Tree files * @var string */ public $folder; /** * A cache for contents of nodes that have been recently added to. * consists of path_to_node => node_content pairs * @var array */ public $insert_node_cache = []; /** * Used to keep track of when this instance was created, as part of managing * file handles expiration (could be set/updated externally to reflect * some other instance using the BPlusTree) * @var int */ public $instance_time; /** * Name of primary key column for records * @var string */ public $key_field; /** * Last folder path of a find operation, provided this was cacheable * @var string */ public $last_find_folder = null; /** * Last encoded key used for a find operation, provided this was cacheable * Used to avoid recomputing path down tree if will be the same. * @var string */ public $last_find_key = null; /** * First key of next node after returned node for the last find operation, * provided this was cacheable * @var string */ public $last_find_next_key = null; /** * File path of last inserted node * @var string */ public $last_insert_node_path; /** * Storage for root node of the B-Tree * @var object */ public $parameters; /** * Array of column names for the columns in a PartitionDocumentBundle which * are of type SERIAL * @var array */ public $serial_columns; /** * The PackedTableTools object used to pack and unpack records in * BPlusTree * @var object */ public $table_tools; /** * Cache of key-values (internal_node_path => array of subnodes) used to * speed up tree traversal * @var array */ public $tree_path_cache = []; /** * Creates/Loads B+-Tree having specified folder and minimum_degree. * * @param string $folder is the folder for storing the B+-Tree files * @param array $format the column names, keys and types for this * B+-Tree object * @param int $max_keys the maximum number of keys a node is allowed to hold * @param object $node_compressor_type * seekquarry\yioop\library\compressors\Compressor object used to * compress index node files. * @param object $blob_compressor_type * seekquarry\yioop\library\compressors\Compressor object used to * compress blob columns. */ public function __construct($folder, $format = self::DEFAULT_PARAMETERS["FORMAT"], $max_keys = self::MAX_KEYS, $node_compressor_type = self::DEFAULT_COMPRESSOR, $blob_compressor_type = self::DEFAULT_COMPRESSOR) { // ensure $max_keys odd $max_keys = ($max_keys % 2 == 1) ? $max_keys : $max_keys + 1; $initial_parameters = self::DEFAULT_PARAMETERS; $initial_parameters["MAX_KEYS"] = $max_keys; $initial_parameters["NODE_COMPRESSOR"] = $node_compressor_type; $initial_parameters["BLOB_COMPRESSOR"] = $blob_compressor_type; $this->node_compressor = new $node_compressor_type(); $this->blob_compressor = new $blob_compressor_type(); $initial_parameters["FORMAT"] = $format; $this->instance_time = hrtime(true); $this->folder = $folder; $changed_parameters = false; if (!file_exists($folder)) { $changed_parameters = true; if(!mkdir($folder)) { return null; } file_put_contents($folder . "/" . self::NEXT_NODE_NAME, serialize(null)); } $this->parameters = self::getParameterInfo($folder); foreach (self::DEFAULT_PARAMETERS as $field => $value) { if (!isset($this->parameters[$field])) { $this->parameters[$field] = $initial_parameters[$field]; $changed_parameters = true; } } $format = $this->parameters["FORMAT"]; $packed_table_format = []; $this->blob_columns = []; $this->serial_columns = []; $this->key_field = null; foreach ($format as $field_name => $type) { $upper_field_name = strtoupper($field_name); if ($upper_field_name == "PRIMARY KEY") { $packed_table_format["PRIMARY KEY"] = $type; $this->key_field = (is_array($type)) ? $type[0] : $type; continue; } $type = strtoupper($type); if (in_array($type, ["BLOB", "SERIAL"])) { $this->blob_columns[] = $field_name; if ($type == "SERIAL") { $this->serial_columns[] = $field_name; } $packed_table_format[$field_name] = "INT"; } else if (isset(PackedTableTools::TYPE_SYNONYMS[$type])) { $packed_table_format[$field_name] = PackedTableTools::TYPE_SYNONYMS[$type]; } else { return null; } } if (empty($this->key_field)) { return null; } if (!empty($this->blob_columns)) { $packed_table_format["LAST_BLOB_LEN"] = "INT"; } $this->table_tools = new PackedTableTools($packed_table_format, $node_compressor_type); if ($changed_parameters) { $this->saveParameters(); } } /** * Adds an entry to the BPlusTree. Entries added might not be fully * on disk until flushLastPutNode is called, as this memory tries to * avoid disk writes. * * @param array $row associative array of filed_name => values for each * column field field this BPlusTree's signature has * @param bool $is_encoded_key whether in $row the primary key field has * already been encoded (rawurlencode) or not * @param int $mode says what to do in the event that an entry with the * given key already exists in the BPlusTree. Possibilities are either * PackedTableTools::APPEND_MODE or PackedTableTools::REPLACE_MODE * The former appends as a subrecord the nonkey fields of the entry * to the existing list of sub-records associated to the given key, the * later replaces the key value pair. * @return bool success (true) or failure (false) */ public function put($row, $is_encoded_key = true, $mode = PackedTableTools::APPEND_MODE) { $key = $row[$this->key_field] ?? false; if ($key === false) { return false; } $encode_key = ($is_encoded_key) ? $key : rawurlencode($key); $insert_node_path = $this->find($encode_key, true); if(empty($insert_node_path)) { return false; } $table_tools = $this->table_tools; $this->flushLastPutNode($insert_node_path); if (!isset($this->insert_node_cache[$insert_node_path])) { $this->insert_node_cache[$insert_node_path] = $table_tools->load($insert_node_path, $mode) ?? []; } $insert_node = $this->insert_node_cache[$insert_node_path]; $archive_filename = (empty($this->blob_columns)) ? "" : $this->archiveFilenameFromNodeFilename($insert_node_path); $this->putNode($row, $insert_node, $archive_filename, $is_encoded_key, $mode); if (count($insert_node) > $this->parameters["MAX_KEYS"]) { $this->flushLastPutNode(); $this->splitRecordsInLeaf($insert_node_path, $insert_node); $this->insert_node_cache = []; $this->tree_path_cache = []; $parent_path = $this->getParentFolder($insert_node_path); $this->updateNodePath($parent_path); return true; } if (count($this->insert_node_cache) >= self::MAX_CACHE_SIZE) { $this->insert_node_cache = []; } $this->insert_node_cache[$insert_node_path] = $insert_node; $this->last_insert_node_path = $insert_node_path; return true; } /** * Saves the the BPlusTree $insert_node_path (if not empty) node to disk. * If $insert_node_path == "" then the last node put'ted to will be saved. * * @param string $insert_node_path name of node to save to disk. */ public function flushLastPutNode($insert_node_path = "") { $last_insert_node_path = $this->last_insert_node_path ?? ""; if (!empty($last_insert_node_path) && $insert_node_path != $last_insert_node_path && isset($this->insert_node_cache[$last_insert_node_path])) { $this->table_tools->save($last_insert_node_path, $this->insert_node_cache[$last_insert_node_path]); unset($this->insert_node_cache[$last_insert_node_path]); $this->last_insert_node_path = ""; } } /** * Given an entry to add to a BPlusTree node, the index file of the node * (for non blob or serial fields), and the name of the archive file for * node (for blob or serial fields) stores the entry to the node. * * @param array $row associative array of filed_name => values for each * column field field this BPlusTree's signature has * @param bool $is_encoded_key whether in $row the primary key field has * already been encoded (rawurlencode) or not * @param array &$node an in-memory code of a node (will be modified by * this method) * @param string $archive_filename the name of the archive file associated * with this file used for blob or serial fields. * @param bool $is_encoded_key whether in $row the primary key field has * already been encoded (rawurlencode) or not * @param int $mode says what to do in the event that an entry with the * given key already exists in the BPlusTree. Possibilities are either * PackedTableTools::APPEND_MODE or PackedTableTools::REPLACE_MODE * The former appends as a subrecord the nonkey fields of the entry * to the existing list of sub-records associated to the given key, the * later replaces the key value pair. */ public function putNode($row, &$node, $archive_filename, $is_encoded_key = true, $mode = PackedTableTools::APPEND_MODE) { $table_tools = $this->table_tools; $key = $row[$this->key_field] ?? false; $encode_key = ($is_encoded_key) ? $key : rawurlencode($key); $out_value = $this->writeBlobsAndPack($archive_filename, $row); $table_tools->add($node, $encode_key, $out_value, PackedTableTools::ADD_MEM_TABLE, $mode); } /** * For a non-leaf node checks if it needs to be split, and if so splits * and fixes keys up the tree to restore BPlus-Tree properties. * * @param string $folder to check if needs split * @return bool success (true) or failure (false) */ public function updateNodePath($folder) { if (strncmp($folder, $this->folder, strlen($this->folder)) != 0) { return false; } $node_prefix = self::NODE_PREFIX; $nodes = glob("$folder/$node_prefix*"); if (count($nodes) < $this->parameters["MAX_KEYS"]) { return true; } if ($folder == $this->folder) { //root case return $this->splitRootNode(); } $len = strlen($folder); $num_nodes = count($nodes); $next_node_name = self::NEXT_NODE_NAME; $this->add_archive_cache = [null, "", -1]; $this->get_archive_cache = [null, "", -1]; $parent_folder = $this->getParentFolder($folder); $half_max = floor($num_nodes/2); $node_name = substr($nodes[$half_max], $len + 1); $new_node = "$parent_folder/$node_name"; mkdir($new_node); $next_node = "$folder/$next_node_name"; rename($next_node, "$new_node/$next_node_name"); file_put_contents($next_node, serialize($node_name)); $node = $nodes[$half_max]; $node_name = substr($node, $len + 1); $archive_prefix = self::ARCHIVE_PREFIX; $least_node_name = self::LEAST_NODE_NAME; $len_node_prefix = strlen($node_prefix); $archive_name = $archive_prefix . substr($node_name, $len_node_prefix); rename($node, "$new_node/$least_node_name"); $archive_filename = "$folder/$archive_name"; if (file_exists($archive_filename)) { rename($archive_filename, "$new_node/$archive_prefix$least_node_name"); } for ($i = $half_max + 1; $i < $num_nodes; $i++) { $node = $nodes[$i]; $node_name = substr($node, $len + 1); rename($node, "$new_node/$node_name"); $archive_name = $archive_prefix . substr($node_name, $len_node_prefix); $archive_filename = "$folder/$archive_name"; if (file_exists($archive_filename)) { rename($archive_filename, "$new_node/$archive_name"); } } return $this->updateNodePath($parent_folder); } /** * This method handles split the root node of a BPlusTree when it gets too * full. Unlike for other nodes when we can add a key in the parent, for the * root, we make a temporary folder and move all the root node's nodes to it * then rename this the the least node of the root, and call updateNodePath * on this to restore the BPlusTree property. * * @return @return bool success (true) or failure (false) */ public function splitRootNode() { $this->last_find_folder = null; $this->last_find_key = null; $this->last_find_next_key = null; $folder = $this->folder; $this->add_archive_cache = [null, "", -1]; $this->get_archive_cache = [null, "", -1]; $len = strlen($folder); $node_prefix = self::NODE_PREFIX; $temp_node_name = self::TEMP_NODE_NAME; $least_node_name = self::LEAST_NODE_NAME; $archive_prefix = self::ARCHIVE_PREFIX; $next_node_name = self::NEXT_NODE_NAME; $nodes = glob("$folder/$node_prefix*"); $tmp_folder = "$folder/$temp_node_name"; $least_node = "$folder/$least_node_name"; $least_archive = "$folder/$archive_prefix$least_node_name"; mkdir($tmp_folder); $next_node_name_path = "$folder/$next_node_name"; if (!file_exists($next_node_name_path)) { file_put_contents($next_node_name_path, serialize(null)); } $tmp_next_node_name_path = "$tmp_folder/$next_node_name"; rename($next_node_name_path, $tmp_next_node_name_path); if (file_exists($least_node)) { rename($least_node, "$tmp_folder/$least_node_name"); if (file_exists($least_archive)) { rename($least_archive, "$tmp_folder/$archive_prefix$least_node_name"); } } $len_node_prefix = strlen($node_prefix); foreach ($nodes as $node) { $node_name = substr($node, $len + 1); rename($node, "$tmp_folder/$node_name"); $archive_name = $archive_prefix . substr($node_name, $len_node_prefix); $archive_filename = "$folder/$archive_name"; if (file_exists($archive_filename)) { rename($archive_filename, "$tmp_folder/$archive_name"); } } rename($tmp_folder, $least_node); return $this->updateNodePath($least_node); } /** * Splits BPlusTree leaf $node with path $node_path into two nodes each * with an equal number of keys. * * @param string $node_path path to file used to store a BPlusTree node * Used to name files after split. * @param array BPlusTree node (associative array of pairs key => * record for key stored according to BPlusTree signature using * PackedTableTools) */ public function splitRecordsInLeaf($node_path, $node) { $this->last_find_folder = null; $this->last_find_key = null; $this->last_find_next_key = null; $parent_folder = $this->getParentFolder($node_path); $archive_prefix = self::ARCHIVE_PREFIX; $temp_node_name = self::TEMP_NODE_NAME; $node_prefix = self::NODE_PREFIX; $tmp_filename = "$parent_folder/$temp_node_name"; $tmp_archive_filename = "$parent_folder/$archive_prefix$temp_node_name"; $archive_filename = (empty($this->blob_columns)) ? "" : $this->archiveFilenameFromNodeFilename($node_path); $num_keys = count($node); $half_num = ceil($num_keys/2); $keys = array_keys($node); sort($keys); $half_key = $keys[$half_num]; /* the str_replaces are because I was getting hard to trace crashes concerning nulls in file name. */ $new_leaf_filename = str_replace("\x00", "0", "$parent_folder/$node_prefix$half_key"); $new_archive_filename = str_replace("\x00", "0", "$parent_folder/$archive_prefix$half_key"); $table_tools = $this->table_tools; $out_archive_filename = $tmp_archive_filename; $out_node = []; $flag = false; for ($i = 0; $i < $half_num; $i++) { $values = $this->getFromNode($keys[$i], $node, $archive_filename); if ($values) { foreach ($values as $value) { $this->putNode($value, $out_node, $out_archive_filename); } } } $table_tools->save($tmp_filename, $out_node); $out_node = []; $out_archive_filename = $new_archive_filename; for ($i = $half_num; $i < $num_keys; $i++) { $values = $this->getFromNode($keys[$i], $node, $archive_filename); if ($values) { foreach ($values as $value) { $this->putNode($value, $out_node, $out_archive_filename); } } } $table_tools->save($new_leaf_filename, $out_node); $this->add_archive_cache = [null, "", -1]; $this->get_archive_cache = [null, "", -1]; rename($tmp_filename, $node_path); if (!empty($this->blob_columns)) { rename($tmp_archive_filename, $archive_filename); } } /** * Returns the record associated with a $key as stored in the BPlusTree. * If $key is not present in tree then returns null * * @param string $key to look up in current BPlusTree * @param bool $is_encoded_key whether $key has been rawurlencode'd or not * @param bool $use_string_node rather than load in a node and split * into keys for use of all keys, read in node as a string and uncompress * but do not perform further decoding (faster if only doing reads). * @param bool $decode whether or not to unpack record * @param bool $look_up_blobs whether to leave in the record the * offset and length for blob items or to look up the blob item * @param int $offset starting record item associated with this key * to return * @param int $limit maximum number of record items associated with this * key to return * @return array of items associated with this key in the BPlusTree * if $look_up_blobs == false, then returns a pair * ["ARCHIVE_FILE" => [filename where blob columns stored, * array of items associated with this key]] */ public function get($key, $is_encoded_key = true, $use_string_node = false, $decode = true, $look_up_blobs = true, $offset = 0, $limit = -1) { $table_tools = $this->table_tools; $key_node_filename = $this->find($key, $is_encoded_key); if (!$key_node_filename) { return null; } $archive_filename = (empty($this->blob_columns)) ? "" : $this->archiveFilenameFromNodeFilename($key_node_filename); if ($use_string_node) { $key_node = $table_tools->load($key_node_filename, $table_tools::AS_STRING_MODE, true); } else { $key_node = $table_tools->load($key_node_filename); } $rows = $this->getFromNode($key, $key_node, $archive_filename, $is_encoded_key, $use_string_node, $decode, $look_up_blobs, $offset, $limit); if (!$look_up_blobs) { return ["ARCHIVE_FILE" => $archive_filename, "ROWS" => $rows]; } return $rows; } /** * Returns records associated with $key in $key_node and the file * associated with $archive_filename (for blob and serial column values) * if it exists. * * @param string $key the key to look up * @param array $key_node the BPlusTree index node in which to look up the * key * @param string $archive_filename file name of archive data (blob and * serial columns) associated with $key_node * @param bool $is_encoded_key whether the key is rawurlencoded or not * @param bool $decode whether to $decode the records or leave them packed * @param bool $look_up_blobs whether to look up blob and serial columns * @param int $offset index of the first record associated with $key to * return * @param int $limit maximum number of records associated with key to return * @return array of value records associated with $key */ public function getFromNode($key, $key_node, $archive_filename, $is_encoded_key = true, $is_string_node = false, $decode = true, $look_up_blobs = true, $offset = 0, $limit = -1) { $table_tools = $this->table_tools; $encode_key = ($is_encoded_key) ? $key : rawurlencode($key); if ($is_string_node) { $values = $table_tools->findRowFromKeyTableString($key_node, $encode_key); } else { $values = $table_tools->find($key_node, $encode_key); } if (!$values) { return null; } if (!$decode) { return $values; } if (!($values = $table_tools->unpack($values, $offset, $limit))) { crawlLog("Unpack BPlusTree error!!! (Key,offset,limit) was: ". "($key, $offset, $limit) .."); $value_message = (is_string($values)) ? toHexString($values) : serialize($values); crawlLog(".. value was:" . $value_message); return null; } $num_unpacked = count($values); if (!$look_up_blobs) { return $values; } $num_blob_columns = count($this->blob_columns); for ($k = 0; $k < $num_unpacked; $k++) { if ($num_blob_columns > 0) { $blob_offset = intval($values[$k][$this->blob_columns[0]]); for ($i = 0; $i < $num_blob_columns; $i++) { $column_name = $this->blob_columns[$i]; $len = ($i + 1 < $num_blob_columns) ? intval($values[$k][$this->blob_columns[$i + 1]]) : $values[$k]["LAST_BLOB_LEN"]; $values[$k][$column_name] = ($len == 0) ? "" : $this->getArchive($archive_filename, $blob_offset, $len); $blob_offset += $len; } unset($values[$k]["LAST_BLOB_LEN"]); foreach ($this->serial_columns as $field_name) { $values[$k][$field_name] = unserialize( $values[$k][$field_name]); } } $values[$k][$this->key_field] = ($is_encoded_key) ? $key : rawurldecode($encode_key); } return $values; } /** * Returns the file path to the file that may or may not contain $key * in the BPlusTree * * @param string $key to look up in BPlusTree * @param bool $is_encoded_key whether $key has been rawurlencode'd or not * @return string file path to node in BPlusTree that may or may not contain * this key */ public function find($key, $is_encoded_key = false) { $encode_key = ($is_encoded_key) ? $key : rawurlencode($key); if (!empty($this->last_find_folder) && !empty($this->last_find_key) && $this->last_find_key <= $encode_key && !empty($this->last_find_next_key) && $encode_key < $this->last_find_next_key) { return $this->last_find_folder; } $current_folder = $this->folder; $cache = & $this->tree_path_cache; $node_prefix = self::NODE_PREFIX; $least_node_name = self::LEAST_NODE_NAME; $node_prefix_and_key = self::NODE_PREFIX . $encode_key; while (isset($cache[$current_folder]) || is_dir($current_folder)) { $current_prefix = "$current_folder/$node_prefix"; $len_current_prefix = strlen($current_prefix); if (!isset($cache[$current_folder])) { $cache[$current_folder] = glob("$current_prefix*"); } $nodes = $cache[$current_folder]; if (empty($nodes)) { return "$current_folder/$least_node_name"; } if ($nodes == $current_folder) { break; } $exact_node = "$current_folder/$node_prefix_and_key"; $least_node = "$current_folder/$least_node_name"; $first_index = 0; $last_index = count($nodes) - 1; $this->last_find_folder = null; $this->last_find_key = null; $this->last_find_next_key = null; if ($exact_node < $nodes[$first_index]) { $this->last_find_folder = $least_node; $this->last_find_key = $encode_key; $this->last_find_next_key = substr($nodes[$first_index], $len_current_prefix); $current_folder = $least_node; } else if ($exact_node == $nodes[$first_index]) { if (!empty($nodes[$first_index + 1])) { $this->last_find_folder = $nodes[$first_index]; $this->last_find_key = $encode_key; $this->last_find_next_key = substr($nodes[$first_index + 1], $len_current_prefix); } $current_folder = $nodes[$first_index]; } else if ($nodes[$last_index] <= $exact_node) { $current_folder = $nodes[$last_index]; } else { while ($first_index < $last_index) { $mid_index = ceil(($first_index + $last_index)/2); if ($nodes[$mid_index] > $exact_node) { $last_index = $mid_index - 1; } else { $first_index = $mid_index; } } if (!empty($nodes[$first_index + 1])) { $this->last_find_folder = $nodes[$first_index]; $this->last_find_key = $encode_key; $this->last_find_next_key = substr($nodes[$first_index + 1], $len_current_prefix); } $current_folder = $nodes[$first_index]; } } $return_folder = null; if ($nodes == $current_folder) { $return_folder = $current_folder; } else if (file_exists($current_folder)) { if (count($cache) >= self::MAX_CACHE_SIZE) { $this->tree_path_cache = []; } $cache[$current_folder] = $current_folder; $return_folder = $current_folder; } else { $this->last_find_folder = null; $this->last_find_key = null; $this->last_find_next_key = null; } return $return_folder; } /** * Given the path to an index node file in the BPlusTree compute the * path to the corresponding archive node file. * * @param string $node_filename of index node path * @return string associated archive node path */ public function archiveFilenameFromNodeFilename($node_filename) { $parent_folder = $this->getParentFolder($node_filename); $len = strlen($parent_folder); $node_name = substr($node_filename, $len + 1); if ($node_name == self::LEAST_NODE_NAME) { return $parent_folder. "/" . self::ARCHIVE_PREFIX . self::LEAST_NODE_NAME; } return $parent_folder . "/" . self::ARCHIVE_PREFIX . substr($node_name, strlen(self::NODE_PREFIX)); } /** * Return the blob item from $archive_filename at $offset of length $len, * uncompress the result * * @param string $archive_filename path to an archive node file for this * BPlusTree * @param int $offset byte offset into archive node file file * @param int $len length of blob item * @return string uncompressed blob item from $archive_filename */ public function getArchive($archive_filename, $offset, $len) { list($fh, $previous_archive_filename, $previous_instance_time) = $this->get_archive_cache; $blob_compressor = $this->blob_compressor; if (!$fh || $previous_archive_filename != $archive_filename || $previous_instance_time != $this->instance_time) { if ($fh) { fclose($fh); } $fh = fopen($archive_filename , "r"); $previous_archive_filename = $archive_filename; $previous_instance_time = $this->instance_time; } if (empty($blob_compressor)) { $blob_compress_type = $this->parameters["BLOB_COMPRESSOR"]; $blob_compressor = new $blob_compress_type(); } $value = false; if ($fh && fseek($fh, $offset) == 0 && $len > 0) { $compressed_file = fread($fh, $len); $value = $blob_compressor->uncompress($compressed_file); } $this->get_archive_cache = [$fh, $previous_archive_filename, $previous_instance_time]; return $value; } /** * Given a encoded hash_key to be used as the name of in the name of * an archive file for a BPlusTree node and a folder where that * node should be stored, return the path name for an archive file for * a node * @param string $current_folder to store archive file in * @param string $encode_key encode key value to be used in the file * name of a BPlusTree node archive file * @return string archive file path */ protected function getArchiveName($current_folder, $encode_key) { $archive_filename = $current_folder . "/" . self::ARCHIVE_PREFIX . $encode_key; return $archive_filename; } /** * Write the blob and serial columns from a row to insert into BPlusTree * and then pack the rest of the row as a string * * @param string $archive_filename name of archive file for a BPlusTree node * @param array $row BPlusTree row to pack Blob columns for * @return string packed row */ protected function writeBlobsAndPack($archive_filename, $row) { $blob_columns = $this->blob_columns ?? []; $serial_columns = $this->serial_columns ?? []; $format = $this->parameters["FORMAT"]; unset($format['PRIMARY KEY']); $format_keys = array_keys($format); $out_value = []; foreach ($format_keys as $format_key) { $out_value[$format_key] = $row[$format_key] ?? null; } if (!empty($blob_columns)) { $old_offset = 0; foreach ($blob_columns as $blob_column) { $blob = $out_value[$blob_column] ?? ""; if (empty($blob)) { $offset = $old_offset; $len = 0; } else { if (in_array($blob_column, $serial_columns)) { $blob = serialize($blob); } if (($add_info = $this->addArchive($archive_filename, $blob)) === false) { return false; } list($offset, $len) = $add_info; } $out_value[$blob_column] = $offset - $old_offset; $old_offset = $offset; } $out_value["LAST_BLOB_LEN"] = $len; } $out_value = $this->table_tools->pack($out_value); return $out_value; } /** * Add a value to an archive file $archive_filename of a B+-tree node * @param string $archive_filename name of archive file * @param string $value value to add * @return array [offset into archive, length data saved] */ protected function addArchive($archive_filename, $value) { list($fh, $previous_archive_filename, $previous_instance_time) = $this->add_archive_cache; if (!$fh || $previous_archive_filename != $archive_filename || $previous_instance_time != $this->instance_time) { if ($fh) { fclose($fh); } $fh = fopen($archive_filename , "c+"); $previous_archive_filename = $archive_filename; $previous_instance_time = $this->instance_time; } $blob_compress_type = $this->parameters["BLOB_COMPRESSOR"]; $blob_compressor = new $blob_compress_type(); fseek($fh, 0, SEEK_END); $offset = ftell($fh); $compressed_value = $blob_compressor->compress($value); $len = strlen($compressed_value); $success = (fwrite($fh, $compressed_value, $len) !== false) ? true : false; $this->add_archive_cache = [$fh, $previous_archive_filename, $previous_instance_time]; if (!$success) { return false; } return [$offset, $len]; } /** * Returns the file path of the parent node of the current internal node * * @param string $folder path to internal BPlusTree node folder. * (Internal nodes are folders, leaves are files) * @return string file_path to parent or false, if no parent. */ public function getParentFolder($folder) { if (!($last_pos = strrpos($folder, "/"))) { return false; } return substr($folder, 0, $last_pos); } /** * Save the operating parameters of this BPlusTree */ public function saveParameters() { $parameter_path = $this->folder . "/" . self::PARAMETERS_FILE; file_put_contents($parameter_path, serialize($this->parameters), LOCK_EX); } /** * Returns the parameters (such as its signature, max keys per nodes, etc) * used to configure the BPlusTree stored at $folder * * @param string $folder file path to a stored BPlusTree * @return array configuration info about the BPlusTree */ public static function getParameterInfo($folder) { $parameter_path = $folder . "/" . self::PARAMETERS_FILE; if(file_exists($parameter_path)) { $parameters = unserialize(file_get_contents($parameter_path)) ?? []; if (!empty($parameters["COMPRESSOR"])) { //original format didn't distinguish between compressor use $parameters["NODE_COMPRESSOR"] ??= $parameters["COMPRESSOR"]; $parameters["BLOB_COMPRESSOR"] ??= $parameters["COMPRESSOR"]; } return $parameters; } else { return []; } } }