Fixes some issues connected with recipe plugin, a=chris

Chris Pollett [2015-07-03 17:Jul:rd]
Fixes some issues connected with recipe plugin, a=chris
Filename
executables/Fetcher.php
library/indexing_plugins/RecipePlugin.php
library/indexing_plugins/WordfilterPlugin.php
library/processors/PageProcessor.php
diff --git a/executables/Fetcher.php b/executables/Fetcher.php
index 436081af2..a1da4c76e 100755
--- a/executables/Fetcher.php
+++ b/executables/Fetcher.php
@@ -1378,13 +1378,15 @@ class Fetcher implements CrawlConstants
                         $info[self::INDEXING_PLUGINS_DATA][$plugin]);
                 }
                 foreach ($processors as $processor) {
-                    $this->plugin_processors[$processor][$plugin_name] =
-                        $plugin_object;
+                    $this->plugin_processors[NS_PROCESSORS .
+                        $processor][$plugin_name] = $plugin_object;
                 }
             }
             foreach ($this->indexed_file_types as $file_type) {
                 $processor = NS_PROCESSORS . ucfirst($file_type)."Processor";
-                if (!class_exists($processor)) {continue; }
+                $processor_path = BASE_DIR . "/library/processors/".
+                    ucfirst($file_type)."Processor.php";
+                if (!class_exists($processor)) { continue; }
                 if (!isset($this->plugin_processors[$processor])) {
                     $this->plugin_processors[$processor] = [];
                 }
diff --git a/library/indexing_plugins/RecipePlugin.php b/library/indexing_plugins/RecipePlugin.php
index 0a938a9d1..b78140084 100644
--- a/library/indexing_plugins/RecipePlugin.php
+++ b/library/indexing_plugins/RecipePlugin.php
@@ -105,7 +105,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
      */
     public function pageProcessing($page, $url)
     {
-        crawlLog("...Using recipe plugin to check for recipes!");
+        L\crawlLog("...Using recipe plugin to check for recipes!");
         $page = preg_replace('@<script[^>]*?>.*?</script>@si', ' ', $page);
         $page = preg_replace('/>/', '> ', $page);
         $dom = HtmlProcessor::dom($page);
@@ -114,9 +114,9 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
         $xpath = new \DOMXPath($dom);
         $recipes_per_page = $xpath->evaluate(
             /*allr, f.com, brec, fnet*/
-            "/html//ul[@class = 'ingredient-wrap'] |
+            "/html//ul[@class = 'ingredient-wrap']|
             /html//*[@class = 'pod ingredients'] |
-            /html//*[@id='recipe_title'] |
+            /html//*[@itemtype='http://data-vocabulary.org/Recipe']|
             /html//div[@class = 'rcp-head clrfix']|
             /html//h1[@class = 'fn recipeDetailHeading']");
         $recipe = [];
@@ -126,8 +126,8 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
             $titles = $xpath->evaluate(
                /* allr, f.com, brec, fnet   */
                "/html//*[@id = 'itemTitle']|
-               /html//h1[@class = 'fn'] |
-               /html//*[@id='recipe_title'] |
+               /html//h1[@class = 'fn']|
+               /html//*[@itemprop='name']|
                /html//div[@class ='rcp-head clrfix']/h1 |
                /html//h1[@class = 'fn recipeDetailHeading']");
             for ($i=0; $i < $recipes_count; $i++) {
@@ -136,7 +136,8 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
                     "/html//ul[@class = 'ingredient-wrap']/li |
                     /html//li[@class = 'ingredient']|
                     /html//*[@class = 'ingredients']/*|
-                    /html//*[@itemprop='ingredients']
+                    /html//*[itemtype=".
+                    "'http://data-vocabulary.org/RecipeIngredient']
                     ");
                 $ingredients_result = "";
                 if (is_object($ingredients) && $ingredients->length != 0){
@@ -159,7 +160,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
             }
         }
         $num_recipes = count($subdocs_description);
-        crawlLog("...$num_recipes found.");
+        L\crawlLog("...$num_recipes found.");
         return $subdocs_description;
     }
     /**
@@ -173,8 +174,8 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
     {
         global $INDEXING_PLUGINS;
         if (!class_exists("\SplHeap")) {
-            crawlLog("...Recipe Plugin Requires SPLHeap for clustering!");
-            crawlLog("...Aborting plugin");
+            L\crawlLog("...Recipe Plugin Requires SPLHeap for clustering!");
+            L\crawlLog("...Aborting plugin");
             return;
         }
         $locale_tag = L\guessLocale();
@@ -194,7 +195,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
                 ($num_results = count($results["PAGES"])) > 0 ) {
                 $raw_recipes = array_merge($raw_recipes, $results["PAGES"]);
             }
-            crawlLog("Scanning recipes $limit through ".
+            L\crawlLog("Scanning recipes $limit through ".
                 ($limit + $num_results).".");
             $limit += $num_results;
             if (isset($results["SAVE_POINT"]) ){
@@ -211,7 +212,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
                 $more_docs = false;
             }
         }
-        crawlLog("...Clustering.");
+        L\crawlLog("...Clustering.");
         // only cluster if would make more than one cluster
         if (count($raw_recipes) * CLUSTER_RATIO > 1 ) {
             $recipes = [];
@@ -305,7 +306,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
                     $matches = 1;
                     foreach ($vector_array as $vector) {
                         $diff = $recipe1_vector[$vector] -
-                                    $recipe2_vector[$vector];
+                            $recipe2_vector[$vector];
                         $vector_diff[$vector] = (pow($diff, 2));
                         if (abs($diff) == 1)
                             $matches += 1;
@@ -314,13 +315,13 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
                     $main_ingredient_match = 1;
                     if ($recipe1_main_ingredient != $recipe2_main_ingredient)
                         $main_ingredient_match = 1000;
-                    $edge_weight = sqrt($edge_weight)*
-                                    $matches * $main_ingredient_match;
+                    $edge_weight = sqrt($edge_weight) *
+                        $matches * $main_ingredient_match;
                     $weights[$k][2] = $edge_weight;
                     $k++;
                 }
             }
-            crawlLog("...Making new shard with clustered recipes as docs.");
+            L\crawlLog("...Making new shard with clustered recipes as docs.");
             $clusters = kruskalClustering($weights,
                 $count, $distinct_ingredients);
             $index_shard = new IndexShard("cluster_shard");
@@ -355,11 +356,11 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
                         $recipes_summary[$recipe][self::HTTP_CODE];
                     $recipe_sites[] = $summary;
                     $meta_ids[] = "ingredient:".trim($cluster["ingredient"]);
-                    crawlLog("ingredient:".$cluster["ingredient"]);
+                    L\crawlLog("ingredient:".$cluster["ingredient"]);
                     if (!$index_shard->addDocumentWords($doc_keys[$recipe],
                         self::NEEDS_OFFSET_FLAG,
                         $word_lists, $meta_ids, true, false)) {
-                        crawlLog("Problem inserting recipe: ".
+                        L\crawlLog("Problem inserting recipe: ".
                             $summary[self::TITLE]);
                     }
                 }
@@ -368,7 +369,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
             $index_shard = IndexShard::load("cluster_shard",
                 $shard_string);
             unset($shard_string);
-            crawlLog("...Adding recipe shard to index archive bundle");
+            L\crawlLog("...Adding recipe shard to index archive bundle");
             $dir = CRAWL_DIR."/cache/".self::index_data_base_name.$index_name;
             $index_archive = new IndexArchiveBundle($dir, false);
             if ($index_shard->word_docs_packed) {
@@ -376,7 +377,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
             }
             $generation = $index_archive->initGenerationToAdd($index_shard);
             if (isset($recipe_sites)) {
-                crawlLog("... Adding ".count($recipe_sites)." recipe docs.");
+                L\crawlLog("... Adding ".count($recipe_sites)." recipe docs.");
                 $index_archive->addPages($generation,
                     self::SUMMARY_OFFSET, $recipe_sites, 0);
             }
@@ -396,7 +397,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
             $this->db->setWorldPermissionsRecursive(
                 CRAWL_DIR.'/cache/'.
                 self::index_data_base_name.$index_name);
-            crawlLog("...Recipe plugin finished.");
+            L\crawlLog("...Recipe plugin finished.");
         }
     }
     /**
diff --git a/library/indexing_plugins/WordfilterPlugin.php b/library/indexing_plugins/WordfilterPlugin.php
index 6d366444f..f88a6311d 100644
--- a/library/indexing_plugins/WordfilterPlugin.php
+++ b/library/indexing_plugins/WordfilterPlugin.php
@@ -297,7 +297,7 @@ EOD;
      */
     public function saveConfiguration()
     {
-        $config_file = WORK_DIRECTORY."/data/word_filter_plugin.txt";
+        $config_file = WORK_DIRECTORY."/data/WordFilterPlugin.txt";
         file_put_contents($config_file, $this->rules_string);
     }
     /**
@@ -310,7 +310,7 @@ EOD;
      */
     public function loadConfiguration()
     {
-        $config_file = WORK_DIRECTORY."/data/word_filter_plugin.txt";
+        $config_file = WORK_DIRECTORY."/data/WordFilterPlugin.txt";
         if (file_exists($config_file)) {
             $this->rules_string =  file_get_contents($config_file);
         }
diff --git a/library/processors/PageProcessor.php b/library/processors/PageProcessor.php
index 98d35ee9a..617e5bc42 100644
--- a/library/processors/PageProcessor.php
+++ b/library/processors/PageProcessor.php
@@ -112,7 +112,8 @@ abstract class PageProcessor implements CrawlConstants
             foreach ($this->plugin_instances as $plugin_instance) {
                 $subdoc = null;
                 $class_name = get_class($plugin_instance);
-                $subtype = lcfirst(substr($class_name, 0, -strlen("Plugin")));
+                $subtype = lcfirst(substr($class_name,
+                    strlen(NS_PLUGINS), -strlen("Plugin")));
                 $subdocs_description = $plugin_instance->pageProcessing(
                     $page, $url);
                 if (is_array($subdocs_description)
ViewGit