diff --git a/src/configs/Createdb.php b/src/configs/Createdb.php
index 3430501fb..6c9637895 100755
--- a/src/configs/Createdb.php
+++ b/src/configs/Createdb.php
@@ -39,6 +39,7 @@ use seekquarry\yioop\library\Cipher;
use seekquarry\yioop\models\Model;
use seekquarry\yioop\models\ProfileModel;
use seekquarry\yioop\models\GroupModel;
+use seekquarry\yioop\configs as C;
if (!empty($_SERVER['DOCUMENT_ROOT'])) {
echo "BAD REQUEST";
diff --git a/src/library/PhraseParser.php b/src/library/PhraseParser.php
index ca8b23b5f..9239badf1 100755
--- a/src/library/PhraseParser.php
+++ b/src/library/PhraseParser.php
@@ -401,6 +401,9 @@ class PhraseParser
} else {
mb_internal_encoding("UTF-8");
$string = mb_strtolower($string);
+ if ($lang == "hi") {
+ $string = preg_replace('/(,:)\p{P}/u', "", $string);
+ }
$string = mb_ereg_replace("\s+|".C\PUNCT, " ", $string);
$terms = self::segmentSegment($string, $lang);
$terms = self::charGramTerms($terms, $lang);
diff --git a/src/locale/hi/resources/Tokenizer.php b/src/locale/hi/resources/Tokenizer.php
index 992573af4..f0df85681 100755
--- a/src/locale/hi/resources/Tokenizer.php
+++ b/src/locale/hi/resources/Tokenizer.php
@@ -43,40 +43,57 @@ use seekquarry\yioop\models as M;
*/
class Tokenizer
{
+ /**
+ * List of verb-like parts of speech that might appear in lexicon
+ * @array
+ */
public static $verb_phrases = ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"];
-
+ /**
+ * List of noun-like parts of speech that might appear in lexicon
+ * @array
+ */
public static $noun_phrases = ["NN", "NNS", "NNP", "NNPS", "PRP"];
-
+ /**
+ * List of adjective-like parts of speech that might appear in lexicon
+ * @array
+ */
public static $adjective_phrases = ["JJ", "JJR", "JJS"];
-
+ /**
+ * List of postpositional-like parts of speech that might appear in lexicon
+ * @array
+ */
public static $postpositional_phrases = ["inj", "PREP", "proNN", "CONJ",
"INT", "particle", "case", "PSP"];
-
+ /**
+ * Any unique identifier corresponding to the component of a triplet which
+ * can be answered using a question answer list
+ * @string
+ */
public static $question_marker = "qqq";
/**
- * Words we don't want to be stemmed
- * @var array
- */
+ * Words we don't want to be stemmed
+ * @var array
+ */
public static $no_stem_list = [];
/**
- * Stub function which could be used for a word segmenter.
- * Such a segmenter on input thisisabunchofwords would output
- * this is a bunch of words
- *
- * @param string $pre_segment before segmentation
- * @return string should return string with words separated by space
- * in this case does nothing
- */
+ * Stub function which could be used for a word segmenter.
+ * Such a segmenter on input thisisabunchofwords would output
+ * this is a bunch of words
+ *
+ * @param string $pre_segment before segmentation
+ * @return string should return string with words separated by space
+ * in this case does nothing
+ */
public static function segment($pre_segment)
{
return $pre_segment;
}
/**
- * Removes the stop words from the page (used for Word Cloud generation)
- *
- * @param string $page the page to remove stop words from.
- * @return string $page with no stop words
- */
+ * Removes the stop words from the page (used for Word Cloud generation)
+ *
+ * @param string $page the page to remove stop words from.
+ * @return string $page with no stop words
+ */
public static function stopwordsRemover($page)
{
$stop_words = [
@@ -106,11 +123,11 @@ class Tokenizer
return $page;
}
/**
- * Computes the stem of an Hindi word
- *
- * @param string $word the string to stem
- * @return string the stem of $word
- */
+ * Computes the stem of an Hindi word
+ *
+ * @param string $word the string to stem
+ * @return string the stem of $word
+ */
public static function stem($word)
{
if (in_array($word, self::$no_stem_list)) {
@@ -120,11 +137,11 @@ class Tokenizer
return $word;
}
/**
- * Removes common Hindi suffixes
- *
- * @param string $word to remove suffixes from
- * @return string result of suffix removal
- */
+ * Removes common Hindi suffixes
+ *
+ * @param string $word to remove suffixes from
+ * @return string result of suffix removal
+ */
private static function removeSuffix($word)
{
$length = mb_strlen($word);
@@ -154,14 +171,14 @@ class Tokenizer
return $word;
}
/**
- * The method takes as input a phrase and returns a string with each
- * term tagged with a part of speech.
- *
- * @param string $phrase text to add parts speech tags to
- * @param bool $with_tokens whether to include the terms and the tags
- * in the output string or just the part of speech tags
- * @return string $tagged_phrase which is a string of format term~pos
- */
+ * The method takes as input a phrase and returns a string with each
+ * term tagged with a part of speech.
+ *
+ * @param string $phrase text to add parts speech tags to
+ * @param bool $with_tokens whether to include the terms and the tags
+ * in the output string or just the part of speech tags
+ * @return string $tagged_phrase which is a string of format term~pos
+ */
public static function tagPartsOfSpeechPhrase($phrase, $with_tokens = true)
{
$tagged_tokens = self::tagTokenizePartOfSpeech($phrase);
@@ -170,12 +187,12 @@ class Tokenizer
return $tagged_phrase;
}
/**
- * Uses the lexicon to assign a tag to each token and then uses a rule
- * based approach to assign the most likely of tags to each token
- *
- * @param string $text input phrase which is to be tagged
- * @return string $result which is an array of token => tag
- */
+ * Uses the lexicon to assign a tag to each token and then uses a rule
+ * based approach to assign the most likely of tags to each token
+ *
+ * @param string $text input phrase which is to be tagged
+ * @return string $result which is an array of token => tag
+ */
public static function tagTokenizePartofSpeech($text)
{
$tokens = preg_split("/[\s]+/", $text);
@@ -214,8 +231,8 @@ class Tokenizer
return self::tagUnknownWords($result);
}
/**
- * This method tags the remaining words from the text.
- */
+ * This method tags the remaining words from the text.
+ */
public static function tagUnknownWords($partiallyTaggedText)
{
$result = $partiallyTaggedText;
@@ -286,14 +303,14 @@ class Tokenizer
return $result;
}
/**
- * This method is used to simplify the different tags of speech to a
- * common form
- *
- * @param array $tagged_tokens which is an array of tokens assigned tags.
- * @param bool $with_tokens whether to include the terms and the tags
- * in the output string or just the part of speech tags
- * @return string $tagged_phrase which is a string fo form token~pos
- */
+ * This method is used to simplify the different tags of speech to a
+ * common form
+ *
+ * @param array $tagged_tokens which is an array of tokens assigned tags.
+ * @param bool $with_tokens whether to include the terms and the tags
+ * in the output string or just the part of speech tags
+ * @return string $tagged_phrase which is a string fo form token~pos
+ */
public static function taggedPartOfSpeechTokensToString($tagged_tokens,
$with_tokens = true)
{
@@ -319,19 +336,19 @@ class Tokenizer
return $tagged_phrase;
}
/**
- * Takes a part-of-speech tagged phrase and pre-tree with a
- * parse-from position and builds a parse tree for a noun if possible
- *
- * @param array $tagged_phrase
- * an array of pairs of the form ("token" => token_for_term,
- * "tag"=> part_of_speech_tag_for_term)
- * @param array $tree that consists of ["curnode" =>
- * current parse position in $tagged_phrase]
- * @return array has fields
- * "cur_node" index of how far we parsed $tagged_phrase
- * "NN" a subarray with a token node for the noun string that was
- * parsed
- */
+ * Takes a part-of-speech tagged phrase and pre-tree with a
+ * parse-from position and builds a parse tree for a noun if possible
+ *
+ * @param array $tagged_phrase
+ * an array of pairs of the form ("token" => token_for_term,
+ * "tag"=> part_of_speech_tag_for_term)
+ * @param array $tree that consists of ["curnode" =>
+ * current parse position in $tagged_phrase]
+ * @return array has fields
+ * "cur_node" index of how far we parsed $tagged_phrase
+ * "NN" a subarray with a token node for the noun string that was
+ * parsed
+ */
public static function extractNoun($tagged_phrase, $tree)
{
//Combining multiple noun into one
@@ -350,18 +367,18 @@ class Tokenizer
return $tree;
}
/**
- * Takes a part-of-speech tagged phrase and pre-tree with a
- * parse-from position and builds a parse tree for a sequence of
- * postpositional phrases if possible
- *
- * @param array $tagged_phrase
- * an array of pairs of the form ("token" => token_for_term,
- * "tag"=> part_of_speech_tag_for_term)
- * @param array $tree that consists of ["cur_node" =>
- * current parse position in $tagged_phrase]
- * @return array has fields
- * "cur_node" index of how far we parsed $tagged_phrase
- */
+ * Takes a part-of-speech tagged phrase and pre-tree with a
+ * parse-from position and builds a parse tree for a sequence of
+ * postpositional phrases if possible
+ *
+ * @param array $tagged_phrase
+ * an array of pairs of the form ("token" => token_for_term,
+ * "tag"=> part_of_speech_tag_for_term)
+ * @param array $tree that consists of ["cur_node" =>
+ * current parse position in $tagged_phrase]
+ * @return array has fields
+ * "cur_node" index of how far we parsed $tagged_phrase
+ */
public static function extractPostposition($tagged_phrase, $tree,
$index = 1)
{
@@ -411,19 +428,19 @@ class Tokenizer
return $tree;
}
/**
- * Takes a part-of-speech tagged phrase and pre-tree with a
- * parse-from position and builds a parse tree for a noun phrase if possible
- *
- * @param array $tagged_phrase
- * an array of pairs of the form ("token" => token_for_term,
- * "tag"=> part_of_speech_tag_for_term)
- * @param array $tree that consists of ["curnode" =>
- * current parse position in $tagged_phrase]
- * @return array has fields
- * "cur_node" index of how far we parsed $tagged_phrase
- * "JJ" with value an adjective subtree
- * "POST" with value a post position subtree
- */
+ * Takes a part-of-speech tagged phrase and pre-tree with a
+ * parse-from position and builds a parse tree for a noun phrase if possible
+ *
+ * @param array $tagged_phrase
+ * an array of pairs of the form ("token" => token_for_term,
+ * "tag"=> part_of_speech_tag_for_term)
+ * @param array $tree that consists of ["curnode" =>
+ * current parse position in $tagged_phrase]
+ * @return array has fields
+ * "cur_node" index of how far we parsed $tagged_phrase
+ * "JJ" with value an adjective subtree
+ * "POST" with value a post position subtree
+ */
public static function extractNounPhrase($tagged_phrase, $tree)
{
$cur_node = $tree['cur_node'];
@@ -450,19 +467,19 @@ class Tokenizer
return $tree;
}
/**
- * Takes a part-of-speech tagged phrase and pre-tree with a
- * parse-from position and builds a parse tree for a verb if possible
- *
- * @param array $tagged_phrase
- * an array of pairs of the form ("token" => token_for_term,
- * "tag"=> part_of_speech_tag_for_term)
- * @param array $tree that consists of ["curnode" =>
- * current parse position in $tagged_phrase]
- * @return array has fields
- * "cur_node" index of how far we parsed $tagged_phrase
- * "VB" a subarray with a token node for the verb string that was
- * parsed
- */
+ * Takes a part-of-speech tagged phrase and pre-tree with a
+ * parse-from position and builds a parse tree for a verb if possible
+ *
+ * @param array $tagged_phrase
+ * an array of pairs of the form ("token" => token_for_term,
+ * "tag"=> part_of_speech_tag_for_term)
+ * @param array $tree that consists of ["curnode" =>
+ * current parse position in $tagged_phrase]
+ * @return array has fields
+ * "cur_node" index of how far we parsed $tagged_phrase
+ * "VB" a subarray with a token node for the verb string that was
+ * parsed
+ */
public static function extractVerb($tagged_phrase, $tree)
{
$cur_node = $tree['cur_node'];
@@ -486,19 +503,19 @@ class Tokenizer
return $tree;
}
/**
- * Takes a part-of-speech tagged phrase and pre-tree with a
- * parse-from position and builds a parse tree for a verb phrase if possible
- *
- * @param array $tagged_phrase
- * an array of pairs of the form ("token" => token_for_term,
- * "tag"=> part_of_speech_tag_for_term)
- * @param array $tree that consists of ["curnode" =>
- * current parse position in $tagged_phrase]
- * @return array has fields
- * "cur_node" index of how far we parsed $tagged_phrase
- * "VP" a subarray with possible fields
- * "VB" with value a verb subtree
- */
+ * Takes a part-of-speech tagged phrase and pre-tree with a
+ * parse-from position and builds a parse tree for a verb phrase if possible
+ *
+ * @param array $tagged_phrase
+ * an array of pairs of the form ("token" => token_for_term,
+ * "tag"=> part_of_speech_tag_for_term)
+ * @param array $tree that consists of ["curnode" =>
+ * current parse position in $tagged_phrase]
+ * @return array has fields
+ * "cur_node" index of how far we parsed $tagged_phrase
+ * "VP" a subarray with possible fields
+ * "VB" with value a verb subtree
+ */
public static function extractVerbPhrase($tagged_phrase, $tree)
{
$cur_node = $tree['cur_node'];
@@ -537,19 +554,19 @@ class Tokenizer
return $tree_new;
}
/**
- * Takes a part-of-speech tagged phrase and pre-tree with a
- * parse-from position and builds a parse tree for an adjective if possible
- *
- * @param array $tagged_phrase
- * an array of pairs of the form ("token" => token_for_term,
- * "tag"=> part_of_speech_tag_for_term)
- * @param array $tree that consists of ["cur_node" =>
- * current parse position in $tagged_phrase]
- * @return array has fields
- * "cur_node" index of how far we parsed $tagged_phrase
- * "JJ" a subarray with a token node for the adjective that was
- * parsed
- */
+ * Takes a part-of-speech tagged phrase and pre-tree with a
+ * parse-from position and builds a parse tree for an adjective if possible
+ *
+ * @param array $tagged_phrase
+ * an array of pairs of the form ("token" => token_for_term,
+ * "tag"=> part_of_speech_tag_for_term)
+ * @param array $tree that consists of ["cur_node" =>
+ * current parse position in $tagged_phrase]
+ * @return array has fields
+ * "cur_node" index of how far we parsed $tagged_phrase
+ * "JJ" a subarray with a token node for the adjective that was
+ * parsed
+ */
public static function extractAdjective($tagged_phrase, $tree)
{
$adjective_string = "";
@@ -567,17 +584,17 @@ class Tokenizer
return $tree;
}
/**
- * Given a part-of-speeech tagged phrase array generates a parse tree
- * for the phrase using a recursive descent parser.
- *
- * @param array $tagged_phrase
- * an array of pairs of the form ("token" => token_for_term,
- * "tag"=> part_of_speech_tag_for_term)
- * @return array used to represent a tree. The array has up to three fields
- * $tree["cur_node"] index of how far we parsed our$tagged_phrase
- * $tree["NP"] contains a subtree for a noun phrase
- * $tree["VP"] contains a subtree for a verb phrase
- */
+ * Given a part-of-speeech tagged phrase array generates a parse tree
+ * for the phrase using a recursive descent parser.
+ *
+ * @param array $tagged_phrase
+ * an array of pairs of the form ("token" => token_for_term,
+ * "tag"=> part_of_speech_tag_for_term)
+ * @return array used to represent a tree. The array has up to three fields
+ * $tree["cur_node"] index of how far we parsed our$tagged_phrase
+ * $tree["NP"] contains a subtree for a noun phrase
+ * $tree["VP"] contains a subtree for a verb phrase
+ */
public static function generatePhraseParseTree($tagged_phrase)
{
$tree = [];
@@ -596,16 +613,16 @@ class Tokenizer
return $tree;
}
/**
- * Scans a word list for phrases. For phrases found generate
- * a list of question and answer pairs at two levels of granularity:
- * CONCISE (using all terms in orginal phrase) and RAW (removing
- * (adjectives, etc).
- *
- * @param array $word_and_phrase_list of statements
- * @return array with two fields: QUESTION_LIST consisting of
- * (SUBJECT, COMPLEMENT) where one of the components has been
- * replaced with a question marker.
- */
+ * Scans a word list for phrases. For phrases found generate
+ * a list of question and answer pairs at two levels of granularity:
+ * CONCISE (using all terms in orginal phrase) and RAW (removing
+ * (adjectives, etc).
+ *
+ * @param array $word_and_phrase_list of statements
+ * @return array with two fields: QUESTION_LIST consisting of
+ * (SUBJECT, COMPLEMENT) where one of the components has been
+ * replaced with a question marker.
+ */
public static function extractTripletsPhrases($word_and_phrase_list)
{
$triplets_list = [];
@@ -633,6 +650,14 @@ class Tokenizer
$out_triplets['QUESTION_ANSWER_LIST'] = $question_answer_list;
return $out_triplets;
}
+ /**
+ * Takes phrase tree $tree and a part-of-speech $pos returns
+ * the deepest $pos only path in tree.
+ *
+ * @param array $tree phrase to extract type from
+ * @param string $pos the part of speech to extract
+ * @return string the label of deepest $pos only path in $tree
+ */
public static function extractDeepestSpeechPartPhrase($tree, $pos)
{
$extract = "";
@@ -644,6 +669,15 @@ class Tokenizer
}
return $extract;
}
+ /**
+ * Takes a parse tree of a phrase or statement and returns an array
+ * with two fields CONCISE and RAW the former having the subject of
+ * the original phrase (as a string) the latter having the importart
+ * parts of the subject
+ *
+ * @param array representation of a parse tree of a phrase
+ * @return array with two fields CONCISE and RAW as described above
+ */
public static function extractSubjectParseTree($tree)
{
$subject = [];
@@ -663,13 +697,22 @@ class Tokenizer
}
return $subject;
}
+ /**
+ * Takes a parse tree of a phrase or statement and returns an array
+ * with two fields CONCISE and RAW the former having the predicate of
+ * the original phrase (as a string) the latter having the importart
+ * parts of the predicate
+ *
+ * @param array representation of a parse tree of a phrase
+ * @return array with two fields CONCISE and RAW as described above
+ */
public static function extractPredicateParseTree($tree)
{
$predicate = [];
if (!empty($tree['VP'])) {
$tree_vp = $tree['VP'];
$predicate['CONCISE'] = self::extractDeepestSpeechPartPhrase(
- $tree_vp, "VB")
+ $tree_vp, "VB");
$raw_predicate = "";
if (!empty($tree_vp['VB'])) {
$tree_vb = $tree_vp['VB'];
@@ -686,6 +729,15 @@ class Tokenizer
}
return $predicate;
}
+ /**
+ * Takes a parse tree of a phrase or statement and returns an array
+ * with two fields CONCISE and RAW the former having the object of
+ * the original phrase (as a string) the latter having the importart
+ * parts of the object
+ *
+ * @param array representation of a parse tree of a phrase
+ * @return array with two fields CONCISE and RAW as described above
+ */
public static function extractObjectParseTree($tree)
{
$object = [];
@@ -713,15 +765,15 @@ class Tokenizer
return $object;
}
/**
- * Takes a parse tree of a phrase and computes subject, predicate, and
- * object arrays. Each of these array consists of two components CONCISE and
- * RAW, CONCISE corresponding to something more similar to the words in the
- * original phrase and RAW to the case where extraneous words have been
- * removed
- *
- * @param are $tree a parse tree for a sentence
- * @return array triplet array
- */
+ * Takes a parse tree of a phrase and computes subject, predicate, and
+ * object arrays. Each of these array consists of two components CONCISE and
+ * RAW, CONCISE corresponding to something more similar to the words in the
+ * original phrase and RAW to the case where extraneous words have been
+ * removed
+ *
+ * @param are $tree a parse tree for a sentence
+ * @return array triplet array
+ */
public static function extractTripletsParseTree($parse_tree)
{
$triplets = [];
@@ -730,6 +782,15 @@ class Tokenizer
$triplets['predicate'] = self::extractPredicateParseTree($parse_tree);
return $triplets;
}
+ /**
+ * Takes a triplets array with subject, predicate, object fields with
+ * CONCISE and RAW subfields and rearranges it to have two fields CONCISE
+ * and RAW with subject, predicate, object, and QUESTION_ANSWER_LIST
+ * subfields
+ *
+ * @param array $sub_pred_obj_triplets in format described above
+ * @return array $processed_triplets in format described above
+ */
public static function rearrangeTripletsByType($sub_pred_obj_triplets)
{
$processed_triplet = [];
@@ -739,12 +800,22 @@ class Tokenizer
self::extractTripletByType($sub_pred_obj_triplets, 'RAW');
return $processed_triplets;
}
+ /**
+ * Takes a triplets array with subject, predicate, object fields with
+ * CONCISE, RAW subfields and produces a triplits with $type subfield (where
+ * $type is one of CONCISE and RAW) and with subject, predicate, object,
+ * and QUESTION_ANSWER_LIST subfields
+ *
+ * @param array $sub_pred_obj_triplets in format described above
+ * @param string $type either CONCISE or RAW
+ * @return array $triplets in format described above
+ */
public static function extractTripletByType($sub_pred_obj_triplets, $type)
{
$triplets = [];
- if (!empty($sub_pred_obj_triplets['subject'])
- && !empty($sub_pred_obj_triplets['predicate'])
- && !empty($sub_pred_obj_triplets['object'])) {
+ if (!empty($sub_pred_obj_triplets['subject'][$type])
+ && !empty($sub_pred_obj_triplets['predicate'][$type])
+ && !empty($sub_pred_obj_triplets['object'][$type])) {
$question_answer_triplets = [];
$question_marker = self::$question_marker;
$sentence = [$sub_pred_obj_triplets['subject'][$type],
@@ -767,33 +838,38 @@ class Tokenizer
return $triplets;
}
/**
- * Takes tagged question string starts with Who
- * and returns question triplet from the question string
- *
- * @param string $tagged_question part-of-speech tagged question
- * @param int $index current index in statement
- * @return array parsed triplet
- */
+ * Takes tagged question string starts with Who
+ * and returns question triplet from the question string
+ *
+ * @param string $tagged_question part-of-speech tagged question
+ * @param int $index current index in statement
+ * @return array parsed triplet
+ */
public static function parseWhoQuestion($tagged_question, $index)
{
+ $start_pos = 0;
+ if ($index == 0)
+ $start_pos = $index + 1;
$generated_questions = [];
$question_marker = self::getQuestionMarker();
$triplets = [];
- $tree_np = self::extractNounPhrase($tagged_question, ["cur_node" => 0]);
+ $tree_np = self::extractNounPhrase($tagged_question, ["cur_node" =>
+ $start_pos]);
$triplets['subject'] = self::extractSubjectParseTree($tree_np);
$tree = ["cur_node" => $index];
- $tree['NP'] = "कौन";
+ $tree['NP'] = $tagged_question[$index]['token'];
$tree_vp = self::extractVerbPhrase($tagged_question, $tree);
$triplets['predicate'] = self::extractPredicateParseTree($tree_vp);
- $triplets['object'] = self::extractObjectParseTree($tree_vp);
$triplet_types = ['CONCISE', 'RAW'];
foreach ($triplet_types as $type) {
- if (!empty($triplets['object'][$type])
+ if (!empty($triplets['subject'][$type])
&& !empty($triplets['predicate'][$type])) {
- $generated_questions[] = trim($triplets['object'][$type]) .
+ $generated_questions[$type][] =
+ trim($triplets['subject'][$type]) .
" " . trim($triplets['predicate'][$type]) . " " .
$question_marker;
- $generated_questions[] = trim($triplets['object'][$type]) .
+ $generated_questions[$type][] =
+ trim($triplets['subject'][$type]) .
" " . $question_marker .
" " . trim($triplets['predicate'][$type]);
}
@@ -801,12 +877,12 @@ class Tokenizer
return $generated_questions;
}
/**
- * Takes a phrase query entered by user and return true if it is question
- * and false if not
- *
- * @param $phrase any statement
- * @return bool returns true if statement is question
- */
+ * Takes a phrase query entered by user and return true if it is question
+ * and false if not
+ *
+ * @param $phrase any statement
+ * @return bool returns true if statement is question
+ */
public function isQuestion($phrase)
{
$who_question = "कौन";
@@ -816,22 +892,37 @@ class Tokenizer
}
return false;
}
+ /**
+ * The function returns the question marker for the locale
+ *
+ * @return the question marker
+ */
public static function getQuestionMarker()
{
return self::$question_marker;
}
+ /**
+ * Takes WH questions and returns the triplet from the question
+ *
+ * @param string $question question to parse
+ * @return array question triplet
+ */
public static function questionParser($question)
{
+ /*
+ * Array of 'wh' questions: What, When, Where, Why, Who, Which, Whom,
+ * Whose
+ */
+ $wh_questions = array( "क्या", "कब", "कहा", "क्यों", "कौन", "जिसे",
+ "जिसका", "कहाँ");
$tagged_question = self::tagTokenizePartOfSpeech($question);
- $generated_questions = "";
$index = -1;
- foreach ($tagged_question as $key => $value) {
- if (strcmp("कौन", $value['token']) == 0) {
- $index = $key;
+ foreach ($tagged_question as $i => $term_pos) {
+ if (in_array($term_pos['token'], $wh_questions)) {
+ $index = $i;
break;
}
}
- $generated_questions = self::parseWhoQuestion($tagged_question, $index);
- return $generated_questions;
+ return self::parseWhoQuestion($tagged_question, $index);
}
}