viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

Implemneted review changes 1. Wrapped some lines > 80 characters. 2.Code formatting, removed some repeated code

Salil Shenoy [2016-11-03 05:Nov:rd]
Implemneted review changes 1. Wrapped some lines > 80 characters. 2.Code formatting, removed some repeated code

Signed-off-by: Chris Pollett <chris@pollett.org>
Filename
src/library/TripletExtractor.php
diff --git a/src/library/TripletExtractor.php b/src/library/TripletExtractor.php
index 6bb38874f..77ff6a1d6 100644
--- a/src/library/TripletExtractor.php
+++ b/src/library/TripletExtractor.php
@@ -107,17 +107,15 @@ class TripletExtractor
                 $line = trim(substr($lex_string, $token_pos,
                     $cur_pos - $token_pos));
                 $tag_list = explode(' ', $line);
-                $dictionary[strtolower(rtrim($token, "."))] =
-                    array_slice($tag_list, 1);
+                $dictionary[$token] = array_slice($tag_list, 1);
                 $cur_pos++;
             }
         }
         // now using our dictionary we tag
         $i = 0;
-        $tag_list = array();
+        $tag_list = [];
+        $prev_tag_list = [];
         foreach ($matches[0] as $token) {
-            $prev_tag_list = $tag_list;
-            $tag_list = [];
             // default to a common noun
             $current = ['token' => $token, 'tag' => 'NN'];
             // remove trailing full stops
@@ -193,6 +191,8 @@ class TripletExtractor
             $i++;
             $previous = $current;
             $previous_token = $token;
+            $prev_tag_list = $tag_list;
+            $tag_list = [];
         }
         return $result;
     }
@@ -243,7 +243,7 @@ class TripletExtractor
      * @param $text any statement
      * @return array words tagged with POS tags
      */
-    public static function partOfSpeechTagger_Brill($text)
+    public static function partOfSpeechTaggerBrill($text)
     {
         static $dict = null;
         $lexicon = C\LOCALE_DIR . "/en_US/resources/lexicon.txt";
@@ -509,6 +509,10 @@ class TripletExtractor
     }

     /**
+     * Takes current tree and returns
+     * tree by adding auxiliary verb
+     * node to it
+     *
      * @param $tagger_array POS tagged array
      * @param $tree current tree
      * @return mixed VP added tree
@@ -539,6 +543,9 @@ class TripletExtractor
     }

     /**
+     * Takes current tree and returns
+     * tree by adding Verb node to it.
+     *
      * @param $tagger_array POS tagged tree
      * @param $tree current tree
      * @return mixed VB added tree
@@ -572,6 +579,9 @@ class TripletExtractor
         return $tree;
    }
    /**
+     * Takes current tree and returns
+     * a triplet extracted from the tree.
+     *
     * @param $tree fully generated tree
     * @return array triplet array
     */
@@ -586,6 +596,10 @@ class TripletExtractor
     }

     /**
+     * Takes triplet tree  and returns
+     * the processed triplet from the
+     * tree.
+     *
      * @param $triplet_tree any statement
      * @return array processed triplet
      */
@@ -600,6 +614,10 @@ class TripletExtractor
     }

     /**
+     * Takes triplet tree and returns
+     * tree an array of raw
+     * triplets.
+     *
      * @param $triplet_tree triplet array
      * @return array raw triplet array
      */
@@ -610,9 +628,12 @@ class TripletExtractor
         if (isset($triplet_tree['subject']['RAW'])
             && isset($triplet_tree['predicate']['RAW'])
             && isset($triplet_tree['object']['RAW'])
-            && !TripletExtractor::IsNullOrEmptyString($triplet_tree['subject']['RAW'])
-            && !TripletExtractor::IsNullOrEmptyString($triplet_tree['predicate']['RAW'])
-            && !TripletExtractor::IsNullOrEmptyString($triplet_tree['object']['RAW'])
+            && !TripletExtractor::isNullOrEmptyString(
+                                    $triplet_tree['subject']['RAW'])
+            && !TripletExtractor::isNullOrEmptyString(
+                                    $triplet_tree['predicate']['RAW'])
+            && !TripletExtractor::isNullOrEmptyString(
+                                    $triplet_tree['object']['RAW'])
         ) {

             $SUBJECT = trim($triplet_tree['subject']['RAW']);
@@ -620,21 +641,31 @@ class TripletExtractor
             $OBJECT = trim($triplet_tree['object']['RAW']);

             $raw_triplet['SUBJECT'] =
-                TripletExtractor::$question_word . " " . $PREDICATE . " " . $OBJECT;
+                TripletExtractor::$question_word . " " .
+                                    $PREDICATE . " " . $OBJECT;
             $raw_triplet['PREDICATE'] =
-                $SUBJECT . " " . TripletExtractor::$question_word . " " . $OBJECT;
+                $SUBJECT . " " . TripletExtractor::$question_word . " " .
+                                    $OBJECT;
             $raw_triplet['OBJECT'] =
-                $SUBJECT . " " . $PREDICATE . " " . TripletExtractor::$question_word;
+                $SUBJECT . " " . $PREDICATE . " " .
+                                    TripletExtractor::$question_word;

-            $question_answer_triplet[TripletExtractor::$question_word . " " . $PREDICATE . " " . $OBJECT] = $SUBJECT;
-            $question_answer_triplet[$SUBJECT . " " . TripletExtractor::$question_word . " " . $OBJECT] = $PREDICATE;
-            $question_answer_triplet[$SUBJECT . " " . $PREDICATE . " " . TripletExtractor::$question_word] = $OBJECT;
+            $question_answer_triplet[TripletExtractor::$question_word . " " .
+                            $PREDICATE . " " . $OBJECT] = $SUBJECT;
+            $question_answer_triplet[$SUBJECT . " " .
+                TripletExtractor::$question_word . " " . $OBJECT] = $PREDICATE;
+            $question_answer_triplet[$SUBJECT . " " . $PREDICATE . " " .
+                TripletExtractor::$question_word] = $OBJECT;
             $raw_triplet['QUESTION_ANSWER_LIST'] = $question_answer_triplet;
         }
         return $raw_triplet;
     }

     /**
+     * Takes triplet tree and returns
+     * tree an array of featured
+     * triplets.
+     *
      * @param $triplet_tree triplet array
      * @return array featured triplet array
      */
@@ -645,24 +676,34 @@ class TripletExtractor
         if (isset($triplet_tree['subject']['FEATURED'])
             && isset($triplet_tree['predicate']['FEATURED'])
             && isset($triplet_tree['object']['FEATURED'])
-            && !TripletExtractor::IsNullOrEmptyString($triplet_tree['subject']['FEATURED'])
-            && !TripletExtractor::IsNullOrEmptyString($triplet_tree['predicate']['FEATURED'])
-            && !TripletExtractor::IsNullOrEmptyString($triplet_tree['object']['FEATURED'])
+            && !TripletExtractor::isNullOrEmptyString(
+                                    $triplet_tree['subject']['FEATURED'])
+            && !TripletExtractor::isNullOrEmptyString(
+                                    $triplet_tree['predicate']['FEATURED'])
+            && !TripletExtractor::isNullOrEmptyString(
+                                    $triplet_tree['object']['FEATURED'])
         ) {
             $SUBJECT = trim($triplet_tree['subject']['FEATURED']);
             $PREDICATE = trim($triplet_tree['predicate']['FEATURED']);
             $OBJECT = trim($triplet_tree['object']['FEATURED']);

             $featured_triplet['SUBJECT'] =
-                TripletExtractor::$question_word . " " . $PREDICATE . " " . $OBJECT;
+                TripletExtractor::$question_word . " " . $PREDICATE .
+                                    " " . $OBJECT;
             $featured_triplet['PREDICATE'] =
-                $SUBJECT . " " . TripletExtractor::$question_word . " " . $OBJECT;
+                $SUBJECT . " " . TripletExtractor::$question_word .
+                                    " " . $OBJECT;
             $featured_triplet['OBJECT'] =
-                $SUBJECT . " " . $PREDICATE . " " . TripletExtractor::$question_word;
+                $SUBJECT . " " . $PREDICATE . " " .
+                                    TripletExtractor::$question_word;

-            $question_answer_triplet[TripletExtractor::$question_word . " " . $PREDICATE . " " . $OBJECT] = $SUBJECT;
-            $question_answer_triplet[$SUBJECT . " " . TripletExtractor::$question_word . " " . $OBJECT] = $PREDICATE;
-            $question_answer_triplet[$SUBJECT . " " . $PREDICATE . " " . TripletExtractor::$question_word] = $OBJECT;
+            $question_answer_triplet[TripletExtractor::$question_word . " " .
+                                    $PREDICATE . " " . $OBJECT] = $SUBJECT;
+            $question_answer_triplet[$SUBJECT . " " .
+                                    TripletExtractor::$question_word .
+                                                " " . $OBJECT] = $PREDICATE;
+            $question_answer_triplet[$SUBJECT . " " . $PREDICATE . " " .
+                                    TripletExtractor::$question_word] = $OBJECT;

             $featured_triplet['QUESTION_ANSWER_LIST'] = $question_answer_triplet;
         }
@@ -670,15 +711,22 @@ class TripletExtractor
     }

     /**
+     * Takes a string and checks if
+     * it is set or empty.
+     *
      * @param $string any string
      * @return bool true if null of empty string
      */
-    public static function IsNullOrEmptyString($string)
+    public static function isNullOrEmptyString($string)
     {
         return (!isset($string) || trim($string) === '');
     }

     /**
+     * Takes current tree and returns
+     * the array of text tagged as
+     * Subject.
+     *
      * @param $tree generated tree
      * @return array subject array
      */
@@ -690,7 +738,8 @@ class TripletExtractor
             $value = TripletExtractor::extractFirstNounFromNPTree($tree_np);
             $subject['RAW'] = $value;
             $featured_subject = "";
-            $it = new \RecursiveIteratorIterator(new \RecursiveArrayIterator($tree_np));
+            $it = new \RecursiveIteratorIterator(
+                                new \RecursiveArrayIterator($tree_np));
             foreach ($it as $v) {
                 $featured_subject .= $v . " ";
             }
@@ -703,6 +752,10 @@ class TripletExtractor
     }

     /**
+     * Takes current tree and returns
+     * the array of text tagged as
+     * Predicate.
+     *
      * @param $tree generated tree
      * @return array predicate array
      */
@@ -716,7 +769,8 @@ class TripletExtractor
             $featured_predicate = "";
             if (isset($tree_vp['VB']) && $tree_vp['VB'] != null) {
                 $tree_vb = $tree_vp['VB'];
-                $it = new \RecursiveIteratorIterator(new \RecursiveArrayIterator($tree_vb));
+                $it = new \RecursiveIteratorIterator(
+                                    new \RecursiveArrayIterator($tree_vb));
                 foreach ($it as $v) {
                     $featured_predicate .= $v . " ";
                 }
@@ -730,6 +784,10 @@ class TripletExtractor
     }

     /**
+     * Takes current tree and returns
+     * the array of text tagged as
+     * Object.
+     *
      * @param $tree generated tree
      * @return array object array
      */
@@ -743,7 +801,8 @@ class TripletExtractor
                 $value = TripletExtractor::extractFirstNounFromNPTree($nb);
                 $object['RAW'] = $value;
                 $featured_object = "";
-                $it = new \RecursiveIteratorIterator(new \RecursiveArrayIterator($nb));
+                $it = new \RecursiveIteratorIterator(
+                                    new \RecursiveArrayIterator($nb));
                 foreach ($it as $v) {
                     $featured_object .= $v . " ";
                 }
@@ -760,6 +819,9 @@ class TripletExtractor
     }

     /**
+     * Takes noun phrase tree and return
+     * the first noun from the tree.
+     *
      * @param $tree_np noun phrase subtree
      * @return string first noun
      */
@@ -776,6 +838,9 @@ class TripletExtractor
     }

     /**
+     * Takes verb phrase tree and returns
+     * the base form of the verb.
+     *
      * @param $tree_vp verb phrase subtree
      * @return string deepest verb
      */
@@ -792,6 +857,10 @@ class TripletExtractor
     }

     /**
+     * Takes current tree and return
+     * attribute maps for noun, adjectives,
+     * preposition.
+     *
      * @param $tree generated tree
      * @return array attributes array
      */
@@ -822,6 +891,11 @@ class TripletExtractor
     }

     /**
+     * Takes the statement and apply
+     * the rules in the defined in the
+     * lexicon, assign parts of speech
+     * and generate a triplet tree.
+     *
      * @param $statement any statement
      * @return array processed triplet
      */
@@ -829,7 +903,7 @@ class TripletExtractor
     {
         try {
             $tagged_statement =
-                TripletExtractor::partOfSpeechTagger_Brill($statement);
+                TripletExtractor::partOfSpeechTaggerBrill($statement);
             $statement_tree =
                 TripletExtractor::generateParseTreeUsingRDP($tagged_statement);
             $triplet_tree = TripletExtractor::extractTriplet($statement_tree);
@@ -840,6 +914,11 @@ class TripletExtractor
     }

     /**
+     * Process individual statements
+     * from the statement array. Generate
+     * a list of question and answer
+     * pairs.
+     *
      * @param $statement_array array of statements
      * @return array list of triplets
      */
@@ -851,23 +930,32 @@ class TripletExtractor
         foreach ($statement_array as $key => $value) {
             try {
                 if (str_word_count($key) >= 3) {
-                    $extracted_triplet = TripletExtractor::storeStatementAsTriplet($key);
+                    $extracted_triplet =
+                            TripletExtractor::storeStatementAsTriplet($key);

                     if (isset($extracted_triplet['RAW']) &&
                         sizeof($extracted_triplet['RAW']) > 0) {
-                        $question_list[$extracted_triplet['RAW']['SUBJECT']] = $value;
-                        $question_list[$extracted_triplet['RAW']['PREDICATE']] = $value;
-                        $question_list[$extracted_triplet['RAW']['OBJECT']] = $value;
-                        $question_answer_list = array_merge($question_answer_list,
+                        $question_list[$extracted_triplet['RAW']['SUBJECT']]
+                                            = $value;
+                        $question_list[$extracted_triplet['RAW']['PREDICATE']]
+                                            = $value;
+                        $question_list[$extracted_triplet['RAW']['OBJECT']]
+                                            = $value;
+                        $question_answer_list =
+                            array_merge($question_answer_list,
                             $extracted_triplet['RAW']['QUESTION_ANSWER_LIST']);
                     }

                     if (isset($extracted_triplet['FEATURED']) &&
                         sizeof($extracted_triplet['FEATURED']) > 0) {
-                        $question_list[$extracted_triplet['FEATURED']['SUBJECT']] = $value;
-                        $question_list[$extracted_triplet['FEATURED']['PREDICATE']] = $value;
-                        $question_list[$extracted_triplet['FEATURED']['OBJECT']] = $value;
-                        $question_answer_list = array_merge($question_answer_list,
+                        $question_list[$extracted_triplet['FEATURED']['SUBJECT']]
+                                            = $value;
+                        $question_list[$extracted_triplet['FEATURED']['PREDICATE']]
+                                            = $value;
+                        $question_list[$extracted_triplet['FEATURED']['OBJECT']]
+                                            = $value;
+                        $question_answer_list =
+                            array_merge($question_answer_list,
                             $extracted_triplet['FEATURED']['QUESTION_ANSWER_LIST']);
                     }
                 }
@@ -890,33 +978,30 @@ class TripletExtractor
      */
     public static function questionParser($question_string)
     {
-        $question_string_tagged = TripletExtractor::partOfSpeechTagger_Brill(
+        $question_string_tagged = TripletExtractor::partOfSpeechTaggerBrill(
             $question_string);
         $index = 0;
         $generated_question_array = [];
-        if (isset($question_string_tagged[$index]) &&
-            ("WRB" == trim($question_string_tagged[$index]['tag']) ||
-                "WP" == trim($question_string_tagged[$index]['tag']))
-        ) {
-            if ("WHO" == strtoupper(
-                    trim($question_string_tagged[$index]['token']))) {
-                $index = $index + 1;
-                $generated_question_array =
-                    TripletExtractor::parseWHOQuestion(
-                        $question_string_tagged, $index);
-            } else {
-                if ("WHERE" == strtoupper(
-                        trim($question_string_tagged[$index]['token'])) ||
-                    "WHEN" == strtoupper(
-                        trim($question_string_tagged[$index]['token'])) ||
-                    "WHAT" == strtoupper(
-                        trim($question_string_tagged[$index]['token']))
-                ) {
+        if (isset($question_string_tagged[$index])) {
+            $tag = trim($question_string_tagged[$index]['tag']);
+            if ("WRB" ==  $tag || "WP" == $tag) {
+                $token = strtoupper(
+                    trim($question_string_tagged[$index]['token']));
+                if ("WHO" == $token) {
                     $index = $index + 1;
                     $generated_question_array =
+                        TripletExtractor::parseWHOQuestion(
+                            $question_string_tagged, $index);
+                } else {
+                    if ("WHERE" == $token ||
+                        "WHEN" == $token ||
+                        "WHAT" == $token) {
+                        $index = $index + 1;
+                        $generated_question_array =
                         TripletExtractor::parseWHPlusQuestion_New(
                             $question_string_tagged,
                         $index);
+                    }
                 }
             }
         }
@@ -945,9 +1030,9 @@ class TripletExtractor
             $tree_vp);
         if (isset($triplet['object']['RAW'])
             && isset($triplet['predicate']['RAW'])
-            && !TripletExtractor::IsNullOrEmptyString(
+            && !TripletExtractor::sNullOrEmptyString(
                 $triplet['object']['RAW'])
-            && !TripletExtractor::IsNullOrEmptyString(
+            && !TripletExtractor::isNullOrEmptyString(
                 $triplet['predicate']['RAW'])
         ) {
             $generated_question_array['RAW']['1'] =
@@ -961,9 +1046,9 @@ class TripletExtractor
         }
         if (isset($triplet['object']['FEATURED'])
             && isset($triplet['predicate']['FEATURED'])
-            && !TripletExtractor::IsNullOrEmptyString(
+            && !TripletExtractor::isNullOrEmptyString(
                 $triplet['object']['FEATURED'])
-            && !TripletExtractor::IsNullOrEmptyString(
+            && !TripletExtractor::isNullOrEmptyString(
                 $triplet['predicate']['FEATURED'])
         ) {
             $generated_question_array['FEATURED']['1'] =
@@ -990,15 +1075,18 @@ class TripletExtractor
     {
         $generated_question_array = [];
         $aux_verb = "";
-        while (isset($question_string_tagged[$index]) &&
-            ("VB" == trim($question_string_tagged[$index]['tag']) ||
-                "VBD" == trim($question_string_tagged[$index]['tag']) ||
-                "VBG" == trim($question_string_tagged[$index]['tag']) ||
-                "VBN" == trim($question_string_tagged[$index]['tag']) ||
-                "VBP" == trim($question_string_tagged[$index]['tag']) ||
-                "VBZ" == trim($question_string_tagged[$index]['tag']))) {
-            $aux_verb .= " " . trim($question_string_tagged[$index]['token']);
-            $index = $index + 1;
+        while (isset($question_string_tagged[$index])) {
+            $tag = trim($question_string_tagged[$index]['tag']);
+            if ("VB" ==  $tag ||
+                "VBD" == $tag ||
+                "VBG" == $tag ||
+                "VBN" == $tag ||
+                "VBP" == $tag ||
+                "VBZ" == $tag) {
+                $token = trim($question_string_tagged[$index]['token']);
+                $aux_verb .= " " . $token;
+                $index = $index + 1;
+            }
         }
         $tree = ["cur_node" => $index];
         $tree['NP'] = "WHPlus";
@@ -1014,7 +1102,7 @@ class TripletExtractor
         $triplet['object'] = TripletExtractor::extractObjectFromTree(
             $tree_vp);
         if (isset($aux_verb)
-            && !TripletExtractor::IsNullOrEmptyString($aux_verb)
+            && !TripletExtractor::isNullOrEmptyString($aux_verb)
         ) {
             $triplet['predicate']['RAW'] = trim($aux_verb) .
                 " " . $triplet['predicate']['RAW'];
@@ -1026,9 +1114,9 @@ class TripletExtractor
         }
         if (isset($triplet['subject']['RAW'])
             && isset($triplet['predicate']['RAW'])
-            && !TripletExtractor::IsNullOrEmptyString(
+            && !TripletExtractor::isNullOrEmptyString(
                 $triplet['subject']['RAW'])
-            && !TripletExtractor::IsNullOrEmptyString(
+            && !TripletExtractor::isNullOrEmptyString(
                 $triplet['predicate']['RAW'])
         ) {
             $generated_question_array['RAW']['1'] =
@@ -1042,9 +1130,9 @@ class TripletExtractor
         }
         if (isset($triplet['subject']['FEATURED'])
             && isset($triplet['predicate']['FEATURED'])
-            && !TripletExtractor::IsNullOrEmptyString(
+            && !TripletExtractor::isNullOrEmptyString(
                 $triplet['subject']['FEATURED'])
-            && !TripletExtractor::IsNullOrEmptyString(
+            && !TripletExtractor::isNullOrEmptyString(
                 $triplet['predicate']['FEATURED'])
         ) {
             $generated_question_array['FEATURED']['1'] =
ViewGit