viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

Minor code formatting tweaks to Gargi's patch

Chris Pollett [2023-09-21 15:Sep:st]
Minor code formatting tweaks to Gargi's patch
Filename
src/library/IndexDocumentBundle.php
src/library/index_bundle_iterators/WordIterator.php
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index 633ec137f..21668fa12 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -1011,7 +1011,8 @@ class IndexDocumentBundle implements CrawlConstants
             if (strpos($cld, "wikipedia") !== false) {
                 $letter_code = chr(ord($letter_code) + 4);
             }
-            $num_slashes = substr_count(substr($site[self::URL], strlen($host)), '/');
+            $num_slashes = substr_count(substr($site[self::URL], strlen($host)),
+                '/');
             /*
              * Discount any trailing slashes in the URL.
              */
@@ -1020,12 +1021,13 @@ class IndexDocumentBundle implements CrawlConstants
             }
             /**
              * The first two bits hold the number of / values (for the
-             * NUM_SLASHES_BONUS). This value is mapped into buckets of {0-1, 2-4,
-             * 5-6, 7+}, wherein all the values in a bucket get the same bonus. These
-             * buckets were decided after experimentation; the fundamental idea is
-             * that URLs for root pages/singly-nested pages are usually more
-             * important than those nested doubly or quadruply, which are in turn
-             * more important than those nested quintuply, etc.
+             * NUM_SLASHES_BONUS). This value is mapped into buckets of
+             * {0-1, 2-4,  5-6, 7+}, wherein all the values in a bucket get the
+             * same bonus. These  buckets were decided after experimentation;
+             * the fundamental idea is  that URLs for root pages/singly-nested
+             * pages are usually more important than those doubly nested  or
+             * quadruply nested, which are in turn  more important than those
+             * quintuply nested, etc.
              */
             if ($num_slashes >= 2 && $num_slashes < 5) {
                 $letter_code = chr(ord($letter_code) + 1);
@@ -1180,7 +1182,8 @@ class IndexDocumentBundle implements CrawlConstants
         return (ord($key[self::DOCID_PART_LEN << 1] ?? '\0') & 4) > 0;
     }
     /**
-     * Finds number of '/' in the url after the hostname represented by doc_id $key.
+     * Finds number of '/' in the url after the hostname represented by doc_id
+     * $key.
      * @param string $key to find '/' count
      */
     public static function findNumSlashes($key)
diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php
index 79eace8a0..0e4014443 100644
--- a/src/library/index_bundle_iterators/WordIterator.php
+++ b/src/library/index_bundle_iterators/WordIterator.php
@@ -554,22 +554,23 @@ class WordIterator extends IndexBundleIterator
                         $this->ranking_factors["HOST_URL_BONUS"];
                 }
                 /**
-                 * For backward compatibility: new bonuses should only be added for
-                 * doc_ids following the new letter_code format.
-                 * Since all old formats use letters (eg. b, t, etc) to denote the
-                 * doc type, the ascii values for these letters are all > 96 (i.e.
+                 * For backward compatibility: new bonuses should only be added
+                 * for  doc_ids following the new letter_code format. Since all
+                 * old formats use letters (b, t, etc.) to denote the doc
+                 * type, the ASCII values for these letters are all > 96 (i.e.,
                  * bits 6 and 7 of the doc_id's 9th byte are both true).
-                 * Since all new letter_code formats use bits 4, 5, 6, 7 to represent
-                 * the doc type as int values mapped between 0-8, there is no value
-                 * in a doc_id's 9th byte that can have both bits 6 and 7
-                 * set to true.
-                 * This difference can be used to check whether $doc_key follows the
-                 * old or new letter_code format.
+                 * Since all new letter_code formats use bits 4, 5, 6, 7 to
+                 * represent  the doc type as int values mapped between 0-8,
+                 * there is no value in a doc_id's 9th byte that can have both
+                 * bits 6 and 7  set to true.
+                 * This difference can be used to check whether $doc_key follows
+                 * the old or new letter_code format.
                  */
                 $doc_id_format = ord($doc_key[8 << 1] ?? 0) & 96;
                 if ($doc_id_format != 96) {
                     if(L\IndexDocumentBundle::isAWikipediaPage($doc_key)) {
-                        $posting[self::DOC_RANK] += $this->ranking_factors["WIKI_BONUS"];
+                        $posting[self::DOC_RANK] +=
+                            $this->ranking_factors["WIKI_BONUS"];
                     }
                     $posting[self::DOC_RANK] +=
                         $this->ranking_factors["NUM_SLASHES_BONUS"] /
ViewGit