viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

Added centroid summarizer in default_crawl

Mangesh Dahale [2014-05-08 05:May:th]
Added centroid summarizer in default_crawl

Signed-off-by: Chris Pollett <chris@pollett.org>
Filename
bin/fetcher.php
bin/queue_server.php
configs/config.php
configs/default_crawl.ini
diff --git a/bin/fetcher.php b/bin/fetcher.php
index d71e8e429..8f2cfe264 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -314,7 +314,7 @@ class Fetcher implements CrawlConstants
      */
     var $crawl_order;
     /**
-     * Stores the name of the crawler used for crawling.
+     * Stores the name of the summarizer used for crawling.
      * Possible values are Basic and Centroid
      * @var string
      */
diff --git a/bin/queue_server.php b/bin/queue_server.php
index 49a4b0389..e9666172b 100755
--- a/bin/queue_server.php
+++ b/bin/queue_server.php
@@ -169,6 +169,12 @@ class QueueServer implements CrawlConstants, Join
      * @var string
      */
     var $crawl_order;
+    /**
+     * Stores the name of the summarizer used for crawling.
+     * Possible values are Basic and Centroid
+     * @var string
+     */
+    var $summarizer_option;
     /**
      * Maximum number of bytes to download of a webpage
      * @var int
@@ -334,6 +340,7 @@ class QueueServer implements CrawlConstants, Join

         //the next values will be set for real in startCrawl
         $this->crawl_order = self::PAGE_IMPORTANCE;
+        $this->summarizer_option = self::CENTROID_SUMMARIZER;
         $this->restrict_sites_by_url = true;
         $this->allowed_sites = array();
         $this->disallowed_sites = array();
diff --git a/configs/config.php b/configs/config.php
index 307879a4f..222c0d5d0 100755
--- a/configs/config.php
+++ b/configs/config.php
@@ -106,7 +106,7 @@ if(MAINTENANCE_MODE && $_SERVER["SERVER_ADDR"] != $_SERVER["REMOTE_ADDR"]) {
 if(!defined('WORK_DIRECTORY')) {
 /*+++ The next block of code is machine edited, change at
 your own risk, please use configure web page instead +++*/
-define('WORK_DIRECTORY', 'C:/xampp/htdocs/yioop_data');
+define('WORK_DIRECTORY', '');
 /*++++++*/
 // end machine edited code
 }
diff --git a/configs/default_crawl.ini b/configs/default_crawl.ini
index d939681fd..620949c87 100644
--- a/configs/default_crawl.ini
+++ b/configs/default_crawl.ini
@@ -28,6 +28,7 @@ crawl_type = 'ax';
 page_range_request = '50000';
 page_recrawl_frequency = '-1';
 restrict_sites_by_url = false;
+summarizer_option = 'dl';

 [indexed_file_types]
 extensions[] = 'unknown';
ViewGit