Fix issues with entity extraction in PhraseParser, fix PHP Other Error for plugins
Fix issues with entity extraction in PhraseParser, fix PHP Other Error for plugins
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 7853a5988..4b8bd7b04 100644
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -1213,18 +1213,16 @@ class QueueServer implements CrawlConstants
$plugin_instance_name =
lcfirst($plugin)."Plugin";
$plugin_name = C\NS_PLUGINS . $plugin . "Plugin";
- $this->$plugin_instance_name =
- new $plugin_name();
+ $plugin_instance = new $plugin_name();
if (method_exists($plugin_name, "setConfiguration") &&
isset($this->indexing_plugins_data[$plugin])) {
- $this->$plugin_instance_name->setConfiguration(
+ $plugin_instance->setConfiguration(
$this->indexing_plugins_data[$plugin]);
}
- if ($this->$plugin_instance_name) {
+ if ($plugin_instance) {
L\crawlLog(
"... executing $plugin_instance_name");
- $this->$plugin_instance_name->
- postProcessing($this->crawl_time);
+ $plugin_instance->postProcessing($this->crawl_time);
}
}
}
diff --git a/src/library/PhraseParser.php b/src/library/PhraseParser.php
index ff9444def..156049c32 100755
--- a/src/library/PhraseParser.php
+++ b/src/library/PhraseParser.php
@@ -424,25 +424,36 @@ class PhraseParser
if (empty($terms)) {
return [];
}
- $t = 1; /*first position in doc is 1 as will encode with modified9
- which requires positive numbers
+ $t = 1; /*first position in doc is 1 as some encoding schemes used
+ require positive numbers
*/
if (strpos($string ?? "", "-") === false) {
foreach ($terms as $term) {
- $pos_lists[$term][] = $t++;
+ if (!empty($term)) {
+ $pos_lists[$term][] = $t++;
+ }
}
} else {
// add all single terms in entity
foreach ($terms as $term) {
+ if (empty($term) || $term == "-") {
+ continue;
+ }
$pos_lists[$term][] = $t;
/* this is to allow for searching by entities and parts
of entities
*/
-
- $term_parts = explode("-", $term ?? "");
- array_shift($term_parts);
- foreach ($term_parts as $part) {
- $pos_lists[$part][] = $t;
+ $entity = strtr($term, "-", " ");
+ if ($entity != $term) {
+ $entity_terms = self::stemCharGramSegment($entity, $lang);
+ $old_t = $t;
+ foreach ($entity_terms as $entity_term) {
+ if (!empty($entity_term)) {
+ $pos_lists[$entity_term][] = $t;
+ $t++;
+ }
+ }
+ $t--;
}
$t++;
}
diff --git a/tests/PhraseParserTest.php b/tests/PhraseParserTest.php
index 7c1e4ca08..84acf4f19 100644
--- a/tests/PhraseParserTest.php
+++ b/tests/PhraseParserTest.php
@@ -83,6 +83,10 @@ EOD;
$words = array_keys($word_lists);
$this->assertTrue(in_array("prime-minist", $words),
"Extract Entity 1");
+ $this->assertTrue(in_array("prime", $words),
+ "Extract Entity PART 1");
+ $this->assertTrue(in_array("minist", $words),
+ "Extract Entity PART 2");
$this->assertTrue(in_array("deep", $words), "Unigrams still present 1");
$this->assertTrue(in_array("space", $words),
"Unigrams still present 2");