viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php index 633ec137f..21668fa12 100644 --- a/src/library/IndexDocumentBundle.php +++ b/src/library/IndexDocumentBundle.php @@ -1011,7 +1011,8 @@ class IndexDocumentBundle implements CrawlConstants if (strpos($cld, "wikipedia") !== false) { $letter_code = chr(ord($letter_code) + 4); } - $num_slashes = substr_count(substr($site[self::URL], strlen($host)), '/'); + $num_slashes = substr_count(substr($site[self::URL], strlen($host)), + '/'); /* * Discount any trailing slashes in the URL. */ @@ -1020,12 +1021,13 @@ class IndexDocumentBundle implements CrawlConstants } /** * The first two bits hold the number of / values (for the - * NUM_SLASHES_BONUS). This value is mapped into buckets of {0-1, 2-4, - * 5-6, 7+}, wherein all the values in a bucket get the same bonus. These - * buckets were decided after experimentation; the fundamental idea is - * that URLs for root pages/singly-nested pages are usually more - * important than those nested doubly or quadruply, which are in turn - * more important than those nested quintuply, etc. + * NUM_SLASHES_BONUS). This value is mapped into buckets of + * {0-1, 2-4, 5-6, 7+}, wherein all the values in a bucket get the + * same bonus. These buckets were decided after experimentation; + * the fundamental idea is that URLs for root pages/singly-nested + * pages are usually more important than those doubly nested or + * quadruply nested, which are in turn more important than those + * quintuply nested, etc. */ if ($num_slashes >= 2 && $num_slashes < 5) { $letter_code = chr(ord($letter_code) + 1); @@ -1180,7 +1182,8 @@ class IndexDocumentBundle implements CrawlConstants return (ord($key[self::DOCID_PART_LEN << 1] ?? '\0') & 4) > 0; } /** - * Finds number of '/' in the url after the hostname represented by doc_id $key. + * Finds number of '/' in the url after the hostname represented by doc_id + * $key. * @param string $key to find '/' count */ public static function findNumSlashes($key) diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php index 79eace8a0..0e4014443 100644 --- a/src/library/index_bundle_iterators/WordIterator.php +++ b/src/library/index_bundle_iterators/WordIterator.php @@ -554,22 +554,23 @@ class WordIterator extends IndexBundleIterator $this->ranking_factors["HOST_URL_BONUS"]; } /** - * For backward compatibility: new bonuses should only be added for - * doc_ids following the new letter_code format. - * Since all old formats use letters (eg. b, t, etc) to denote the - * doc type, the ascii values for these letters are all > 96 (i.e. + * For backward compatibility: new bonuses should only be added + * for doc_ids following the new letter_code format. Since all + * old formats use letters (b, t, etc.) to denote the doc + * type, the ASCII values for these letters are all > 96 (i.e., * bits 6 and 7 of the doc_id's 9th byte are both true). - * Since all new letter_code formats use bits 4, 5, 6, 7 to represent - * the doc type as int values mapped between 0-8, there is no value - * in a doc_id's 9th byte that can have both bits 6 and 7 - * set to true. - * This difference can be used to check whether $doc_key follows the - * old or new letter_code format. + * Since all new letter_code formats use bits 4, 5, 6, 7 to + * represent the doc type as int values mapped between 0-8, + * there is no value in a doc_id's 9th byte that can have both + * bits 6 and 7 set to true. + * This difference can be used to check whether $doc_key follows + * the old or new letter_code format. */ $doc_id_format = ord($doc_key[8 << 1] ?? 0) & 96; if ($doc_id_format != 96) { if(L\IndexDocumentBundle::isAWikipediaPage($doc_key)) { - $posting[self::DOC_RANK] += $this->ranking_factors["WIKI_BONUS"]; + $posting[self::DOC_RANK] += + $this->ranking_factors["WIKI_BONUS"]; } $posting[self::DOC_RANK] += $this->ranking_factors["NUM_SLASHES_BONUS"] /