Code Refactor Part 3: Ran Cleanup Utilities

Salil Shenoy [2016-12-13 19:Dec:th]

Code Refactor Part 3: Ran Cleanup Utilities

Signed-off-by: Chris Pollett <chris@pollett.org>

Filename
src/library/PhraseParser.php
src/locale/en_US/resources/Tokenizer.php
src/models/PhraseModel.php

diff --git a/src/library/PhraseParser.php b/src/library/PhraseParser.php
index 69ef0f329..f98be05b5 100755
--- a/src/library/PhraseParser.php
+++ b/src/library/PhraseParser.php
@@ -709,7 +709,7 @@ class PhraseParser
         return self::getNGramsTerm($terms, $n);
     }
     /**
-     * Returns the characters n-grams for the given terms where n is the
+     * Returns the characters n-grams for the given terms where n is the
      * length.
      *
      * @param array $terms the terms to make n-grams for
@@ -1361,4 +1361,4 @@ vaffanculo fok hoer kut lul やりまん 打っ掛け
         }
         return $result;
     }
-}
+}
\ No newline at end of file
diff --git a/src/locale/en_US/resources/Tokenizer.php b/src/locale/en_US/resources/Tokenizer.php
index eeee5b3e9..e13594f04 100755
--- a/src/locale/en_US/resources/Tokenizer.php
+++ b/src/locale/en_US/resources/Tokenizer.php
@@ -550,12 +550,12 @@ class Tokenizer
      * Takes a part-of-speech tagged phrase and pre-tree with a
      * parse-from position and builds a parse tree for a determiner if possible
      *
-     * @param array $tagged_phrase
+     * @param array $tagged_phrase
      *      an array of pairs of the form ("token" => token_for_term,
      *     "tag"=> part_of_speech_tag_for_term)
      * @param array $tree that consists of ["curnode" =>
-     *      current parse position in $tagged_phrase]
-     * @return array has fields
+     *      current parse position in $tagged_phrase]
+     * @return array has fields
      *      "cur_node" index of how far we parsed $tagged_phrase
      *      "DT" a subarray with a token node for the determiner that was
      *      parsed
@@ -575,12 +575,12 @@ class Tokenizer
      * Takes a part-of-speech tagged phrase and pre-tree with a
      * parse-from position and builds a parse tree for an adjective if possible
      *
-     * @param array $tagged_phrase
+     * @param array $tagged_phrase
      *      an array of pairs of the form ("token" => token_for_term,
      *     "tag"=> part_of_speech_tag_for_term)
      * @param array $tree that consists of ["cur_node" =>
-     *      current parse position in $tagged_phrase]
-     * @return array has fields
+     *      current parse position in $tagged_phrase]
+     * @return array has fields
      *      "cur_node" index of how far we parsed $tagged_phrase
      *      "JJ" a subarray with a token node for the adjective that was
      *      parsed
@@ -605,12 +605,12 @@ class Tokenizer
      * Takes a part-of-speech tagged phrase and pre-tree with a
      * parse-from position and builds a parse tree for a noun if possible
      *
-     * @param array $tagged_phrase
+     * @param array $tagged_phrase
      *      an array of pairs of the form ("token" => token_for_term,
      *     "tag"=> part_of_speech_tag_for_term)
      * @param array $tree that consists of ["curnode" =>
-     *      current parse position in $tagged_phrase]
-     * @return array has fields
+     *      current parse position in $tagged_phrase]
+     * @return array has fields
      *      "cur_node" index of how far we parsed $tagged_phrase
      *      "NN" a subarray with a token node for the noun string that was
      *      parsed
@@ -637,12 +637,12 @@ class Tokenizer
      * parse-from position and builds a parse tree for a sequence of
      * prepositional phrases if possible
      *
-     * @param array $tagged_phrase
+     * @param array $tagged_phrase
      *      an array of pairs of the form ("token" => token_for_term,
      *     "tag"=> part_of_speech_tag_for_term)
      * @param array $tree that consists of ["cur_node" =>
-     *      current parse position in $tagged_phrase]
-     * @return array has fields
+     *      current parse position in $tagged_phrase]
+     * @return array has fields
      *      "cur_node" index of how far we parsed $tagged_phrase
      *      parsed followed by additional possible fields (here i
      *      represents the ith clause found):
@@ -704,12 +704,12 @@ class Tokenizer
      * Takes a part-of-speech tagged phrase and pre-tree with a
      * parse-from position and builds a parse tree for a noun phrase if possible
      *
-     * @param array $tagged_phrase
+     * @param array $tagged_phrase
      *      an array of pairs of the form ("token" => token_for_term,
      *     "tag"=> part_of_speech_tag_for_term)
      * @param array $tree that consists of ["curnode" =>
-     *      current parse position in $tagged_phrase]
-     * @return array has fields
+     *      current parse position in $tagged_phrase]
+     * @return array has fields
      *      "cur_node" index of how far we parsed $tagged_phrase
      *      "NP" a subarray with possible fields
      *      "DT" with value a determiner subtree
@@ -749,15 +749,15 @@ class Tokenizer
      * Takes a part-of-speech tagged phrase and pre-tree with a
      * parse-from position and builds a parse tree for a verb if possible
      *
-     * @param array $tagged_phrase
+     * @param array $tagged_phrase
      *      an array of pairs of the form ("token" => token_for_term,
      *     "tag"=> part_of_speech_tag_for_term)
      * @param array $tree that consists of ["curnode" =>
-     *      current parse position in $tagged_phrase]
-     * @return array has fields
+     *      current parse position in $tagged_phrase]
+     * @return array has fields
      *      "cur_node" index of how far we parsed $tagged_phrase
      *      "VB" a subarray with a token node for the verb string that was
-     *      parsed
+     *      parsed
      */
     public static function extractVerb($tagged_phrase, $tree)
     {
@@ -785,12 +785,12 @@ class Tokenizer
      * Takes a part-of-speech tagged phrase and pre-tree with a
      * parse-from position and builds a parse tree for a verb phrase if possible
      *
-     * @param array $tagged_phrase
+     * @param array $tagged_phrase
      *      an array of pairs of the form ("token" => token_for_term,
      *     "tag"=> part_of_speech_tag_for_term)
      * @param array $tree that consists of ["curnode" =>
-     *      current parse position in $tagged_phrase]
-     * @return array has fields
+     *      current parse position in $tagged_phrase]
+     * @return array has fields
      *      "cur_node" index of how far we parsed $tagged_phrase
      *      "VP" a subarray with possible fields
      *      "VB" with value a verb subtree
@@ -837,9 +837,9 @@ class Tokenizer
      * Given a part-of-speeech tagged phrase array generates a parse tree
      * for the phrase using a recursive descent parser.
      *
-     * @param array $tagged_phrase
+     * @param array $tagged_phrase
      *      an array of pairs of the form ("token" => token_for_term,
-     *     "tag"=> part_of_speech_tag_for_term)
+     *     "tag"=> part_of_speech_tag_for_term)
      * @return array used to represent a tree. The array has up to three fields
      *      $tree["cur_node"] index of how far we parsed our$tagged_phrase
      *      $tree["NP"] contains a subtree for a noun phrase
@@ -899,23 +899,25 @@ class Tokenizer
         $question_list = [];
         $question_answer_list = [];
         $word_and_phrase_list = array_filter($word_and_phrase_list,
-            function ($key) {
+            function ($key) {
                 return str_word_count($key) >= C\PHRASE_THRESHOLD;
             }, \ARRAY_FILTER_USE_KEY );
         $triplet_types = ['CONCISE', 'RAW'];
         $triplet_parts = ['subject', 'predicate', 'object'];
         $tokenizer = PhraseParser::getTokenizer($lang);
         foreach ($word_and_phrase_list as $word_and_phrase => $position_list) {
-            $tagged_phrase = $tokenizer->tagTokenizePartOfSpeech($word_and_phrase);
+            $tagged_phrase =
+                $tokenizer->tagTokenizePartOfSpeech($word_and_phrase);
             $parse_tree = $tokenizer->generatePhraseParseTree($tagged_phrase);
             $triplets = self::extractTripletsParseTree($parse_tree);
-            $extracted_triplets = self::rearrangeTripletsByType($triplets, $lang);
+            $extracted_triplets =
+                self::rearrangeTripletsByType($triplets, $lang);

             foreach ($triplet_types as $type) {
                 if (!empty($extracted_triplets[$type])) {
                     $triplet = $extracted_triplets[$type];
                     foreach ($triplet_parts as $part) {
-                        if(!empty($triplet[$part])) {
+                        if (!empty($triplet[$part])) {
                             $question_list[$triplet[$part]] = $position_list;
                         }
                     }
@@ -1583,7 +1585,7 @@ class Tokenizer
      *
      * @return the question marker
      */
-    public static function getQuestionMarker()
+    public static function getQuestionMarker()
     {
         return self::$question_marker;
     }
@@ -1655,4 +1657,4 @@ class Tokenizer
             "(,\s?when[^,]*,)|(,\s?where[^,]*,)/i", "", $result);
         return $result;
     }
-}
+}
\ No newline at end of file
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index 81df13f2b..16b439959 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -649,7 +649,7 @@ class PhraseModel extends ParallelModel
                     $tmp_hash = (is_array($tmp_hash)) ? $tmp_hash : [$tmp_hash];
                     $test =  array_merge($tmp_hash, [L\crawlHash($word)]);
                 } else {
-                    if(in_array($word, $found_materialized_metas) &&
+                    if (in_array($word, $found_materialized_metas) &&
                         !$metas_accounted) {
                         $meta_keys[] = $tmp_hash;
                     } else {
@@ -657,7 +657,7 @@ class PhraseModel extends ParallelModel
                     }
                 }
             }
-            if(!$metas_accounted) {
+            if (!$metas_accounted) {
                 $word_keys = array_merge($word_keys, $meta_keys);
             }
             if (count($word_keys) == 0) {
@@ -810,7 +810,7 @@ class PhraseModel extends ParallelModel
         }
         $found_metas = array_unique($found_metas);
         $found_materialized_metas = array_unique($found_materialized_metas);
-        if(empty(trim($phrase_string)) && count($found_metas) == 2
+        if (empty(trim($phrase_string)) && count($found_metas) == 2
             && (in_array("site:doc", $found_metas)
             || in_array("site:any", $found_metas))) {
             /*site:doc and site:any doesn't work with materialized metas by
@@ -1813,4 +1813,4 @@ class PhraseModel extends ParallelModel
         }
         return $group_iterator;
     }
-}
+}
\ No newline at end of file

ViewGit