viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
Filename | |
---|---|
bin/queue_server.php | |
configs/config.php | |
controllers/admin_controller.php | |
lib/index_bundle_iterators/group_iterator.php |
diff --git a/bin/queue_server.php b/bin/queue_server.php index 8434dfee1..2224ccede 100755 --- a/bin/queue_server.php +++ b/bin/queue_server.php @@ -1147,23 +1147,15 @@ class QueueServer implements CrawlConstants $now = time(); if(count($this->hourly_crawl_data) > 0 ) { $last_recent_hourly_pair = array_pop($this->hourly_crawl_data); - $change_in_time_hours = - floatval(($now - $last_recent_hourly_pair[0])/3600.0); - $change_in_urls = floatval($info_bundle['COUNT'] - - $last_recent_hourly_pair[1]); - $crawl_status['VISITED_URLS_COUNT_PER_HOUR'] = - $change_in_urls/$change_in_time_hours; - if($change_in_time_hours <= 1) { + $change_in_time = + ($now - $last_recent_hourly_pair[0]); + if($change_in_time <= 3600) { $this->hourly_crawl_data[] = $last_recent_hourly_pair; } - } else { - $change_in_time_hours = - floatval(($now - $this->crawltime)/3600.0); - $crawl_status['VISITED_URLS_COUNT_PER_HOUR'] = - $info_bundle['COUNT']/$change_in_time_hours; - } + } array_unshift($this->hourly_crawl_data, array($now, $info_bundle['COUNT'])); + $crawl_status['VISITED_COUNT_HISTORY'] = $this->hourly_crawl_data; $crawl_status['VISITED_URLS_COUNT'] =$info_bundle['VISITED_URLS_COUNT']; $crawl_status['DESCRIPTION'] = $index_archive_info['DESCRIPTION']; $crawl_status['QUEUE_PEAK_MEMORY'] = memory_get_peak_usage(); diff --git a/configs/config.php b/configs/config.php index 5067e1c15..b395fe1dd 100755 --- a/configs/config.php +++ b/configs/config.php @@ -320,13 +320,13 @@ define ('EN_RATIO', 0.9); define ('AD_HOC_TITLE_LENGTH', 10); /** BM25F weight for title text */ -define ('TITLE_WEIGHT', 4); +define ('TITLE_WEIGHT', 2); /** BM25F weight for other text within doc*/ define ('DESCRIPTION_WEIGHT', 1); /** BM25F weight for other text within links to a doc*/ -define ('LINK_WEIGHT', 1); +define ('LINK_WEIGHT', 0.5); /** diff --git a/controllers/admin_controller.php b/controllers/admin_controller.php index 4225a28dc..5a0af7b3d 100755 --- a/controllers/admin_controller.php +++ b/controllers/admin_controller.php @@ -251,6 +251,18 @@ class AdminController extends Controller implements CrawlConstants $data = array_merge($data, $crawl_status); } } + if(isset($data['VISITED_COUNT_HISTORY']) && + count($data['VISITED_COUNT_HISTORY']) > 1) { + $recent = array_shift($data['VISITED_COUNT_HISTORY']); + $oldest = array_pop($data['VISITED_COUNT_HISTORY']); + $change_in_time_hours = floatval($recent[0] - $oldest[0])/3600.; + $change_in_urls = $recent[1] - $oldest[1]; + $data['VISITED_URLS_COUNT_PER_HOUR'] = + number_format($change_in_urls/$change_in_time_hours, 2, + ".", ""); + } else { + $data['VISITED_URLS_COUNT_PER_HOUR'] = 0; + } $data['RECENT_CRAWLS'] = $this->crawlModel->getCrawlList(false, true); if(isset($data['CRAWL_TIME'])) { //erase from previous crawl list any active crawl diff --git a/lib/index_bundle_iterators/group_iterator.php b/lib/index_bundle_iterators/group_iterator.php index 1161c505e..78471326e 100644 --- a/lib/index_bundle_iterators/group_iterator.php +++ b/lib/index_bundle_iterators/group_iterator.php @@ -458,7 +458,8 @@ class GroupIterator extends IndexBundleIterator $boost = 0; } $out_pages[$hash_url][self::SCORE] = - $out_pages[$hash_url][self::HASH_SUM_SCORE] + $boost; + $out_pages[$hash_url][self::HASH_SUM_SCORE] + + 0*$boost; } else { $out_pages[$hash_url][self::SCORE] = $out_pages[$hash_url][self::HASH_SUM_SCORE]; @@ -499,7 +500,7 @@ class GroupIterator extends IndexBundleIterator $min = ($current_rank < $min ) ? $current_rank : $min; $max = ($max < $current_rank ) ? $current_rank : $max; $sum_score += $hash_page[self::DOC_RANK] - * $relevance_boost * pow(1.2,$hash_page[self::RELEVANCE]) * + * $relevance_boost * pow(1.1,$hash_page[self::RELEVANCE]) * $hash_page[self::PROXIMITY] * $domain_weights[$hash_host]; $sum_rank += $hash_page[self::DOC_RANK] * $domain_weights[$hash_host];