Fixes bug introduced in named entity predict, fixes miscellaneous notices, a=chris
Fixes bug introduced in named entity predict, fixes miscellaneous notices, a=chris
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php
index 97b3f23cb..798fccecc 100755
--- a/src/controllers/SearchController.php
+++ b/src/controllers/SearchController.php
@@ -614,6 +614,7 @@ class SearchController extends Controller implements CrawlConstants
$data['CATEGORY_TYPE'] = $media_categories[$key]['TYPE'];
} else {
$fail_category = $this->clean($_REQUEST['category'], 'string');
+ $data['SCRIPT'] ??= "";
$data['SCRIPT'] .=
"doMessage('<h1 class=\"red\" >".
tl('search_controller_no_trend_category',
diff --git a/src/library/NamedEntityContextTagger.php b/src/library/NamedEntityContextTagger.php
index c402393fd..62963016d 100644
--- a/src/library/NamedEntityContextTagger.php
+++ b/src/library/NamedEntityContextTagger.php
@@ -308,15 +308,26 @@ class NamedEntityContextTagger extends ContextTagger
*/
public function predict($sentence)
{
- if (!is_array($sentence)) {
- if ($sentence == "") {
- $terms = [];
- } else {
- $terms = preg_split("/[\s]+/u", $sentence);
- }
- } else {
+ if (empty($sentence)) {
+ return [];
+ }
+ if (is_array($sentence)) {
$terms = $sentence;
+ } else {
+ $terms = preg_split("/[\s]+/u", $sentence);
+ }
+ if (count($terms) > 1) {
+ $results = [];
+ foreach ($terms as $term) {
+ $entities = $this->predict($term);
+ if (!empty($entities)) {
+ $results = array_merge($results, $entities);
+ }
+ }
+ return $results;
}
+ $terms = preg_split('//u', $terms[0], null,
+ PREG_SPLIT_NO_EMPTY);
if (!count($terms)) {
return [];
}
diff --git a/src/library/StochasticTermSegmenter.php b/src/library/StochasticTermSegmenter.php
index 766c4eca9..98395e9df 100644
--- a/src/library/StochasticTermSegmenter.php
+++ b/src/library/StochasticTermSegmenter.php
@@ -439,7 +439,8 @@ class StochasticTermSegmenter
}
$subdic = $subdic[$characters[$j]];
if (isset($subdic['$']) && (!isset($score[$j]) ||
- (isset($score[$index - 1]) &&
+ (isset($score[$index - 1]) && is_numeric($subdic['$'])
+ && is_numeric($subdic['$']) &&
$score[$index - 1] + $subdic['$'] < $score[$j]))) {
$score[$j] = $score[$index - 1] +
$this->getScore($subdic['$']);
diff --git a/src/locale/zh_CN/resources/nect_weights.txt.gz b/src/locale/zh_CN/resources/nect_weights.txt.gz
index a54e319dd..8e6fea731 100755
Binary files a/src/locale/zh_CN/resources/nect_weights.txt.gz and b/src/locale/zh_CN/resources/nect_weights.txt.gz differ
diff --git a/tests/ZhTokenizerTest.php b/tests/ZhTokenizerTest.php
index 1e6698341..aaf6e4cb6 100644
--- a/tests/ZhTokenizerTest.php
+++ b/tests/ZhTokenizerTest.php
@@ -58,8 +58,8 @@ class ZhTokenizerTest extends UnitTest
*/
public function namedEntityTestCase()
{
- $source = "郑振铎 国民党 國家元首 行政權 日本";
- $expected_tagging = "郑振铎_nr 国民党_nt 日本_ns";
+ $source = "孙向宏喜欢去洛杉矶旅游";
+ $expected_tagging = "孙向宏_nr 洛杉矶_ns";
$ne_tagger = new L\NamedEntityContextTagger('zh-CN');
$output_tagging = $ne_tagger->tag($source);
$this->assertEqual($output_tagging, $expected_tagging,