viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

Remove recursive call in last, a=chris

Chris Pollett [2020-07-13 01:Jul:th]
Remove recursive call in last, a=chris
Filename
src/library/NamedEntityContextTagger.php
src/locale/zh_CN/resources/nect_weights.txt.gz
diff --git a/src/library/NamedEntityContextTagger.php b/src/library/NamedEntityContextTagger.php
index 62963016d..3a9768d28 100644
--- a/src/library/NamedEntityContextTagger.php
+++ b/src/library/NamedEntityContextTagger.php
@@ -312,81 +312,75 @@ class NamedEntityContextTagger extends ContextTagger
             return [];
         }
         if (is_array($sentence)) {
-            $terms = $sentence;
+            $sentence_vector = $sentence;
         } else {
-            $terms = preg_split("/[\s]+/u", $sentence);
-        }
-        if (count($terms) > 1) {
-            $results = [];
-            foreach ($terms as $term) {
-                $entities = $this->predict($term);
-                if (!empty($entities)) {
-                    $results = array_merge($results, $entities);
-                }
-            }
-            return $results;
-        }
-        $terms = preg_split('//u', $terms[0], null,
-            PREG_SPLIT_NO_EMPTY);
-        if (!count($terms)) {
-            return [];
+            $sentence_vector = preg_split("/[\s]+/u", $sentence);
         }
         if (!$this->word_feature) {
             $this->loadWeights();
         }
-        $results = [];
-        for($i = 0; $i < count($terms); $i++) {
-            $term = $terms[$i];
-            $score = [];
-            foreach($this->tag_set as $possible_tag => $tag_index) {
-                $score[$possible_tag] = 0;
-                for ($j = -2; $j <= 2; $j++) {
-                    $k = $this->getIndex($i + $j, $terms);
-                    if (isset($this->word_feature[$k])) {
-                        $score[$possible_tag] +=
-                            $this->getW($k, $j, $tag_index);
+        $found_entities = [];
+        foreach ($sentence_vector as $term) {
+            $characters = preg_split('//u', $term, null,
+                PREG_SPLIT_NO_EMPTY);
+            if (empty($characters)) {
+                continue;
+            }
+            $tags = [];
+            for($i = 0; $i < count($characters); $i++) {
+                $character = $characters[$i];
+                $score = [];
+                foreach($this->tag_set as $possible_tag => $tag_index) {
+                    $score[$possible_tag] = 0;
+                    for ($j = -2; $j <= 2; $j++) {
+                        $k = $this->getIndex($i + $j, $characters);
+                        if (isset($this->word_feature[$k])) {
+                            $score[$possible_tag] +=
+                                $this->getW($k, $j, $tag_index);
+                        }
                     }
+                    if ($i == 0) {
+                        $tf1 = "start";
+                        $tf2 = "start-start";
+                    } else if ($i == 1) {
+                        $tf1 = $tags[$i - 1];
+                        $tf2 = "start-" . $tags[$i - 1];
+                    } else {
+                        $tf1 = $tags[$i - 1];
+                        $tf2 = $tags[$i - 2] . "-" . $tags[$i - 1];
+                    }
+                    $score[$possible_tag] += $this->getT($tf1, $tag_index);
+                    $score[$possible_tag] += $this->getT($tf2, $tag_index);
+                    $score[$possible_tag] += $this->getB($tag_index);
                 }
-                if ($i == 0) {
-                    $tf1 = "start";
-                    $tf2 = "start-start";
-                } else if ($i == 1) {
-                    $tf1 = $results[$i - 1];
-                    $tf2 = "start-" . $results[$i - 1];
-                } else {
-                    $tf1 = $results[$i - 1];
-                    $tf2 = $results[$i - 2] . "-" . $results[$i - 1];
-                }
-                $score[$possible_tag] += $this->getT($tf1, $tag_index);
-                $score[$possible_tag] += $this->getT($tf2, $tag_index);
-                $score[$possible_tag] += $this->getB($tag_index);
+                $tags[] = array_keys($score, max($score))[0];
             }
-            $results[] = array_keys($score, max($score))[0];
-        }
-        $pre_tag = 'o';
-        $current_entity = "";
-        $ret = [];
-        for ($i = 0; $i < count($terms); $i++) {
-            if ($pre_tag != $results[$i] && $pre_tag != "o") {
-                if (mb_strlen($current_entity) < self::MAX_ENTITY_LENGTH) {
-                    $ret[] = [$current_entity, $pre_tag];
+            $pre_tag = 'o';
+            $current_entity = "";
+            $entities = [];
+            for ($i = 0; $i < count($characters); $i++) {
+                if ($pre_tag != $tags[$i] && $pre_tag != "o") {
+                    if (mb_strlen($current_entity) < self::MAX_ENTITY_LENGTH) {
+                        $entities[] = [$current_entity, $pre_tag];
+                    }
+                    $current_entity = "";
                 }
-                $current_entity = "";
-            }
-            if ($results[$i] != "o") {
-                if ($current_entity) {
-                    $current_entity .= $terms[$i];
-                } else {
-                    $current_entity = $terms[$i];
+                if ($tags[$i] != "o") {
+                    if ($current_entity) {
+                        $current_entity .= $characters[$i];
+                    } else {
+                        $current_entity = $characters[$i];
+                    }
                 }
+                $pre_tag = $tags[$i];
             }
-            $pre_tag = $results[$i];
-        }
-        if ($pre_tag != "o") {
-            if (mb_strlen($current_entity) < self::MAX_ENTITY_LENGTH) {
-                $ret[] = [$current_entity, $pre_tag];
+            if ($pre_tag != "o") {
+                if (mb_strlen($current_entity) < self::MAX_ENTITY_LENGTH) {
+                    $entities[] = [$current_entity, $pre_tag];
+                }
             }
+            $found_entities = array_merge($found_entities, $entities);
         }
-        return $ret;
+        return $found_entities;
     }
 }
diff --git a/src/locale/zh_CN/resources/nect_weights.txt.gz b/src/locale/zh_CN/resources/nect_weights.txt.gz
index 8e6fea731..46cdc9d3d 100755
Binary files a/src/locale/zh_CN/resources/nect_weights.txt.gz and b/src/locale/zh_CN/resources/nect_weights.txt.gz differ
ViewGit