diff --git a/src/locale/hi/resources/Tokenizer.php b/src/locale/hi/resources/Tokenizer.php index 08da7bfce..2cc5239cf 100755 --- a/src/locale/hi/resources/Tokenizer.php +++ b/src/locale/hi/resources/Tokenizer.php @@ -144,7 +144,7 @@ class Tokenizer $tag_list = []; $i = 0; $model = new M\Model(); - foreach ($tokens as $token) + foreach ($tokens as $token) { //Tag the tokens as found in the Lexicon $token = trim($token); @@ -152,7 +152,7 @@ class Tokenizer $term = $current["token"]; $sql = "SELECT PART_OF_SPEECH FROM LEXICON WHERE TERM = '{$term}' AND LOCALE = 'hi'"; - $queryResult = @$model->db->execute($sql); + $queryResult = @$model->db->execute($sql); if ($queryResult !== false) { $row = $model->db->fetchArray($queryResult); $current["tag"] = $row["PART_OF_SPEECH"]; @@ -160,7 +160,7 @@ class Tokenizer if (is_numeric($token)) { $current["tag"] = "NN"; - } else if (strcmp($token,"है") == 0 || + } else if (strcmp($token,"है") == 0 || strcmp($token, "हैं") == 0) { $current["tag"] = "VB"; } @@ -177,7 +177,7 @@ class Tokenizer /** * This method tags the remaining words from the text. */ - public static function tagUnknownWords($partiallyTaggedText) + public static function tagUnknownWords($partiallyTaggedText) { $result = $partiallyTaggedText; $verbs = ["VBZ","VBD","VBN"]; @@ -240,16 +240,16 @@ class Tokenizer mb_substr($current["token"], -2, 2) == "तम") { $current["tag"] = "JJ"; $result[$i] = $current; - } + } if ($current["tag"] == "UNKNOWN") { $current["tag"] = "NN"; $result[$i] = $current; - } + } if ($previous["tag"] == "UNKNOWN"){ $previous["tag"] = "NN"; - $result[$i-1] = $previous; + $result[$i-1] = $previous; } } $previous = $current; @@ -276,8 +276,8 @@ class Tokenizer "VBZ" => "VB", "JJ" => "AJ", "JJR" => "AJ", "JJS" => "AJ", "RB" => "AV", "RBR" => "AV", "RBS" => "AV", "WRB" => "AV", - "inj" => "IN", "case" => "IN", "proNN" => "IN", "particle" => "IN", - "PREP" => "IN", "IN" => "IN", "PSP" => "IN", + "inj" => "IN", "case" => "IN", "proNN" => "IN", "particle" => "IN", + "PREP" => "IN", "IN" => "IN", "PSP" => "IN", "direct_DT" => "DT", ]; foreach ($tagged_tokens as $t) { @@ -295,7 +295,7 @@ class Tokenizer * * @param array $tagged_phrase * an array of pairs of the form ("token" => token_for_term, - * "tag"=> part_of_speech_tag_for_term) + * "tag" => part_of_speech_tag_for_term) * @param array $tree that consists of ["curnode" => * current parse position in $tagged_phrase] * @return array has fields @@ -355,7 +355,7 @@ class Tokenizer while (isset($tagged_phrase[$cur_node]["tag"]) && in_array($tagged_phrase[$cur_node]["tag"], self::$adjective_phrases)) { - $adjective_string .= " " . + $adjective_string .= " " . $tagged_phrase[$cur_node]["token"]; $cur_node++; }