diff --git a/executables/Fetcher.php b/executables/Fetcher.php
index 436081af2..a1da4c76e 100755
--- a/executables/Fetcher.php
+++ b/executables/Fetcher.php
@@ -1378,13 +1378,15 @@ class Fetcher implements CrawlConstants
$info[self::INDEXING_PLUGINS_DATA][$plugin]);
}
foreach ($processors as $processor) {
- $this->plugin_processors[$processor][$plugin_name] =
- $plugin_object;
+ $this->plugin_processors[NS_PROCESSORS .
+ $processor][$plugin_name] = $plugin_object;
}
}
foreach ($this->indexed_file_types as $file_type) {
$processor = NS_PROCESSORS . ucfirst($file_type)."Processor";
- if (!class_exists($processor)) {continue; }
+ $processor_path = BASE_DIR . "/library/processors/".
+ ucfirst($file_type)."Processor.php";
+ if (!class_exists($processor)) { continue; }
if (!isset($this->plugin_processors[$processor])) {
$this->plugin_processors[$processor] = [];
}
diff --git a/library/indexing_plugins/RecipePlugin.php b/library/indexing_plugins/RecipePlugin.php
index 0a938a9d1..b78140084 100644
--- a/library/indexing_plugins/RecipePlugin.php
+++ b/library/indexing_plugins/RecipePlugin.php
@@ -105,7 +105,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
*/
public function pageProcessing($page, $url)
{
- crawlLog("...Using recipe plugin to check for recipes!");
+ L\crawlLog("...Using recipe plugin to check for recipes!");
$page = preg_replace('@<script[^>]*?>.*?</script>@si', ' ', $page);
$page = preg_replace('/>/', '> ', $page);
$dom = HtmlProcessor::dom($page);
@@ -114,9 +114,9 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
$xpath = new \DOMXPath($dom);
$recipes_per_page = $xpath->evaluate(
/*allr, f.com, brec, fnet*/
- "/html//ul[@class = 'ingredient-wrap'] |
+ "/html//ul[@class = 'ingredient-wrap']|
/html//*[@class = 'pod ingredients'] |
- /html//*[@id='recipe_title'] |
+ /html//*[@itemtype='http://data-vocabulary.org/Recipe']|
/html//div[@class = 'rcp-head clrfix']|
/html//h1[@class = 'fn recipeDetailHeading']");
$recipe = [];
@@ -126,8 +126,8 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
$titles = $xpath->evaluate(
/* allr, f.com, brec, fnet */
"/html//*[@id = 'itemTitle']|
- /html//h1[@class = 'fn'] |
- /html//*[@id='recipe_title'] |
+ /html//h1[@class = 'fn']|
+ /html//*[@itemprop='name']|
/html//div[@class ='rcp-head clrfix']/h1 |
/html//h1[@class = 'fn recipeDetailHeading']");
for ($i=0; $i < $recipes_count; $i++) {
@@ -136,7 +136,8 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
"/html//ul[@class = 'ingredient-wrap']/li |
/html//li[@class = 'ingredient']|
/html//*[@class = 'ingredients']/*|
- /html//*[@itemprop='ingredients']
+ /html//*[itemtype=".
+ "'http://data-vocabulary.org/RecipeIngredient']
");
$ingredients_result = "";
if (is_object($ingredients) && $ingredients->length != 0){
@@ -159,7 +160,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
}
}
$num_recipes = count($subdocs_description);
- crawlLog("...$num_recipes found.");
+ L\crawlLog("...$num_recipes found.");
return $subdocs_description;
}
/**
@@ -173,8 +174,8 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
{
global $INDEXING_PLUGINS;
if (!class_exists("\SplHeap")) {
- crawlLog("...Recipe Plugin Requires SPLHeap for clustering!");
- crawlLog("...Aborting plugin");
+ L\crawlLog("...Recipe Plugin Requires SPLHeap for clustering!");
+ L\crawlLog("...Aborting plugin");
return;
}
$locale_tag = L\guessLocale();
@@ -194,7 +195,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
($num_results = count($results["PAGES"])) > 0 ) {
$raw_recipes = array_merge($raw_recipes, $results["PAGES"]);
}
- crawlLog("Scanning recipes $limit through ".
+ L\crawlLog("Scanning recipes $limit through ".
($limit + $num_results).".");
$limit += $num_results;
if (isset($results["SAVE_POINT"]) ){
@@ -211,7 +212,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
$more_docs = false;
}
}
- crawlLog("...Clustering.");
+ L\crawlLog("...Clustering.");
// only cluster if would make more than one cluster
if (count($raw_recipes) * CLUSTER_RATIO > 1 ) {
$recipes = [];
@@ -305,7 +306,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
$matches = 1;
foreach ($vector_array as $vector) {
$diff = $recipe1_vector[$vector] -
- $recipe2_vector[$vector];
+ $recipe2_vector[$vector];
$vector_diff[$vector] = (pow($diff, 2));
if (abs($diff) == 1)
$matches += 1;
@@ -314,13 +315,13 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
$main_ingredient_match = 1;
if ($recipe1_main_ingredient != $recipe2_main_ingredient)
$main_ingredient_match = 1000;
- $edge_weight = sqrt($edge_weight)*
- $matches * $main_ingredient_match;
+ $edge_weight = sqrt($edge_weight) *
+ $matches * $main_ingredient_match;
$weights[$k][2] = $edge_weight;
$k++;
}
}
- crawlLog("...Making new shard with clustered recipes as docs.");
+ L\crawlLog("...Making new shard with clustered recipes as docs.");
$clusters = kruskalClustering($weights,
$count, $distinct_ingredients);
$index_shard = new IndexShard("cluster_shard");
@@ -355,11 +356,11 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
$recipes_summary[$recipe][self::HTTP_CODE];
$recipe_sites[] = $summary;
$meta_ids[] = "ingredient:".trim($cluster["ingredient"]);
- crawlLog("ingredient:".$cluster["ingredient"]);
+ L\crawlLog("ingredient:".$cluster["ingredient"]);
if (!$index_shard->addDocumentWords($doc_keys[$recipe],
self::NEEDS_OFFSET_FLAG,
$word_lists, $meta_ids, true, false)) {
- crawlLog("Problem inserting recipe: ".
+ L\crawlLog("Problem inserting recipe: ".
$summary[self::TITLE]);
}
}
@@ -368,7 +369,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
$index_shard = IndexShard::load("cluster_shard",
$shard_string);
unset($shard_string);
- crawlLog("...Adding recipe shard to index archive bundle");
+ L\crawlLog("...Adding recipe shard to index archive bundle");
$dir = CRAWL_DIR."/cache/".self::index_data_base_name.$index_name;
$index_archive = new IndexArchiveBundle($dir, false);
if ($index_shard->word_docs_packed) {
@@ -376,7 +377,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
}
$generation = $index_archive->initGenerationToAdd($index_shard);
if (isset($recipe_sites)) {
- crawlLog("... Adding ".count($recipe_sites)." recipe docs.");
+ L\crawlLog("... Adding ".count($recipe_sites)." recipe docs.");
$index_archive->addPages($generation,
self::SUMMARY_OFFSET, $recipe_sites, 0);
}
@@ -396,7 +397,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
$this->db->setWorldPermissionsRecursive(
CRAWL_DIR.'/cache/'.
self::index_data_base_name.$index_name);
- crawlLog("...Recipe plugin finished.");
+ L\crawlLog("...Recipe plugin finished.");
}
}
/**
diff --git a/library/indexing_plugins/WordfilterPlugin.php b/library/indexing_plugins/WordfilterPlugin.php
index 6d366444f..f88a6311d 100644
--- a/library/indexing_plugins/WordfilterPlugin.php
+++ b/library/indexing_plugins/WordfilterPlugin.php
@@ -297,7 +297,7 @@ EOD;
*/
public function saveConfiguration()
{
- $config_file = WORK_DIRECTORY."/data/word_filter_plugin.txt";
+ $config_file = WORK_DIRECTORY."/data/WordFilterPlugin.txt";
file_put_contents($config_file, $this->rules_string);
}
/**
@@ -310,7 +310,7 @@ EOD;
*/
public function loadConfiguration()
{
- $config_file = WORK_DIRECTORY."/data/word_filter_plugin.txt";
+ $config_file = WORK_DIRECTORY."/data/WordFilterPlugin.txt";
if (file_exists($config_file)) {
$this->rules_string = file_get_contents($config_file);
}
diff --git a/library/processors/PageProcessor.php b/library/processors/PageProcessor.php
index 98d35ee9a..617e5bc42 100644
--- a/library/processors/PageProcessor.php
+++ b/library/processors/PageProcessor.php
@@ -112,7 +112,8 @@ abstract class PageProcessor implements CrawlConstants
foreach ($this->plugin_instances as $plugin_instance) {
$subdoc = null;
$class_name = get_class($plugin_instance);
- $subtype = lcfirst(substr($class_name, 0, -strlen("Plugin")));
+ $subtype = lcfirst(substr($class_name,
+ strlen(NS_PLUGINS), -strlen("Plugin")));
$subdocs_description = $plugin_instance->pageProcessing(
$page, $url);
if (is_array($subdocs_description)