viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

Reduce memory requirements of save without dictionary take II, a=chris

Chris Pollett [2018-06-19 03:Jun:th]
Reduce memory requirements of save without dictionary take II, a=chris
Filename
src/library/IndexShard.php
diff --git a/src/library/IndexShard.php b/src/library/IndexShard.php
index 0bd9c4c24..93f37dd4e 100644
--- a/src/library/IndexShard.php
+++ b/src/library/IndexShard.php
@@ -1322,13 +1322,12 @@ class IndexShard extends PersistentStructure implements
         if($with_logging) {
             crawlLog("..without dictionary version of shard header written");
         }
-        $header = "";
         $remaining = $this->word_docs_len;
         $offset = 0;
         $buffer_size = 16 * self::SHARD_BLOCK_SIZE;
         while ($remaining > 0) {
             $len = min($remaining, $buffer_size);
-            $data = $this->getWordDocsSubstring($offset, $len);
+            $data = $this->getWordDocsSubstring($offset, $len, false);
             fwrite($fh, $data);
             $offset += $len;
             $remaining -= $len;
@@ -1599,16 +1598,17 @@ class IndexShard extends PersistentStructure implements
      * @param $offset byte offset to begin getting data out of disk-based
      *     word_docs
      * @param $len number of bytes to get
+     * @param bool $cache whether to cache disk blocks read from disk
      * @return desired string
      */
-    public function getWordDocsSubstring($offset = 0, $len = 0)
+    public function getWordDocsSubstring($offset = 0, $len = 0, $cache = true)
     {
         if ($len <= 0) {
             $len = $this->word_docs_len;
         }
         if ($this->read_only_from_disk) {
             return $this->getShardSubstring($this->word_doc_offset + $offset,
-                $len);
+                $len, $cache);
         }
         return substr($this->word_docs, $offset, $len);
     }
@@ -1631,16 +1631,17 @@ class IndexShard extends PersistentStructure implements
      * @param $offset byte offset to begin getting data out of disk-based
      *     doc_infos
      * @param $len number of bytes to get
+     * @param bool $cache whether to cache disk blocks read from disk
      * @return desired string
      */
-    public function getDocInfoSubstring($offset = 0, $len = 0)
+    public function getDocInfoSubstring($offset = 0, $len = 0, $cache = false)
     {
         if ($len <= 0) {
             $len = $this->docids_len;
         }
         if ($this->read_only_from_disk) {
             return $this->getShardSubstring(
-                $this->doc_info_offset + $offset, $len, false);
+                $this->doc_info_offset + $offset, $len, $cache);
         }
         return substr($this->doc_infos, $offset, $len);
     }
@@ -1728,6 +1729,10 @@ class IndexShard extends PersistentStructure implements
         if (!$cache) {
             return fread($this->fh, self::SHARD_BLOCK_SIZE);
         }
+        if (count($this->blocks) > self::SHARD_BLOCK_SIZE) {
+            $this->blocks = [];
+            $this->blocks_words = [];
+        }
         $this->blocks[$bytes] = fread($this->fh, self::SHARD_BLOCK_SIZE);
         $tmp = & $this->blocks[$bytes];
         $this->blocks_words += array_combine(
ViewGit