viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
diff --git a/src/configs/Config.php b/src/configs/Config.php index 25a6ca070..233bf16b9 100755 --- a/src/configs/Config.php +++ b/src/configs/Config.php @@ -261,10 +261,10 @@ ini_set('pcre.backtrack_limit', 1000000); nsconddefine("BASE_DIR", str_replace("\\", "/", realpath(__DIR__ ."/../"))); nsconddefine("PARENT_DIR", substr(BASE_DIR, 0, -strlen("/src"))); nsconddefine("TEST_DIR", PARENT_DIR . '/tests'); -if (file_exists(BASE_DIR."/configs/LocalConfig.php")) { +if (file_exists(BASE_DIR . "/configs/LocalConfig.php")) { /** Include any locally specified defines (could use as an alternative way to set work directory) */ - require_once(BASE_DIR."/configs/LocalConfig.php"); + require_once(BASE_DIR . "/configs/LocalConfig.php"); } initializeBaseUrlAndCurrentWorkingDirectory(); /** Yioop Namespace*/ @@ -496,15 +496,15 @@ if (file_exists(WORK_DIRECTORY . PROFILE_FILE_NAME)) { /** BM25F weight for other text within links to a doc*/ nsdefine('LINK_WEIGHT', 2); /** - If that many exist, the minimum number of results to get - and group before trying to compute the top x (say 10) results + * If that many exist, the minimum number of results to get + * and group before trying to compute the top x (say 10) results */ nsdefine('MIN_RESULTS_TO_GROUP', 200); /** - For a given number of search results total to return (total_num) - server_alpha*total_num/num_servers will be returned any a given - queue server machine - */ + * For a given number of search results total to return (total_num) + * server_alpha*total_num/num_servers will be returned any a given + * queue server machine + */ nsdefine('SERVER_ALPHA', 1.6); nsdefine('BACKGROUND_COLOR', "#FFFFFF"); nsdefine('FOREGROUND_COLOR', "#FFFFFF"); @@ -871,7 +871,7 @@ nsconddefine('QUEUE_SLEEP_TIME', 5); nsconddefine('MIRROR_SYNC_FREQUENCY', ONE_HOUR); /** How often mirror script tries to notify machine it is mirroring that it is still alive*/ -nsconddefine('MIRROR_NOTIFY_FREQUENCY', 5 * ONE_MINUTE); +nsconddefine('MIRROR_NOTIFY_FREQUENCY', ONE_MINUTE); /** Max time before dirty index (queue_server) and filters (fetcher) will be force saved in seconds*/ nsconddefine('FORCE_SAVE_TIME', ONE_HOUR); @@ -884,8 +884,8 @@ nsconddefine('MAX_QUERY_LEN', 4096); /** whether to use question answering system */ nsconddefine('ENABLE_QUESTION_ANSWERING', true); /** If true, when processing query see if subsets of terms in query form a - known phrase and if so do lookup with that rather than do a conjunctive - query over those terms + * known phrase and if so do lookup with that rather than do a conjunctive + * query over those terms */ nsconddefine("SUFFIX_PHRASES", false); /** Number of words until to switch from bag of words to phrase lookup diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php index 69616e7f0..3798f8562 100755 --- a/src/controllers/SearchController.php +++ b/src/controllers/SearchController.php @@ -163,6 +163,11 @@ class SearchController extends Controller implements CrawlConstants $data['TOTAL_TIME'] = L\changeInMicrotime( $_SERVER["REQUEST_TIME_FLOAT"]); if ($view == "serial") { + if (isset($_REQUEST['mirror']) && + $_REQUEST['mirror'] == "true") { + // mark if we are a mirror -- not making use of yet + $data['MIRROR'] = true; + } $data = serialize($data); if (empty(ini_get('zlib.output_compression')) && !$this->web_site->isCli()) { @@ -677,6 +682,7 @@ class SearchController extends Controller implements CrawlConstants } } /** + * Only used for serial network queries * Used to check if there are any mirrors of the current server. * If so, it tries to distribute the query requests randomly amongst * the mirrors and itself. To determine if there are mirrors of the @@ -687,6 +693,9 @@ class SearchController extends Controller implements CrawlConstants */ public function mirrorHandle() { + if (empty($_REQUEST['f']) || $_REQUEST['f'] != 'serial') { + return false; + } $mirror_table_name = C\CRAWL_DIR . "/" . self::mirror_table_name; $handled = false; if (file_exists($mirror_table_name)) { @@ -699,7 +708,11 @@ class SearchController extends Controller implements CrawlConstants if ($entry[0] == "::1") { $entry[0] = "[::1]"; } - $request = "http://" . $entry[0] . $entry[1]; + /* assume mirror uses same scheme as machine mirroring + * i.e., http or https + */ + $request = UrlParser::getScheme(C\BASE_URL) . '://'. + $entry[0] . $entry[1]; $mirrors[] = $request; } } @@ -710,8 +723,23 @@ class SearchController extends Controller implements CrawlConstants // if ==$count, we'll let the current machine handle it if ($rand < $count) { $request = $mirrors[$rand] . "?" . - $_SERVER["QUERY_STRING"] . "&network=false"; - echo FetchUrl::getPage($request); + $_SERVER["QUERY_STRING"] . "&mirror=true"; + if (strpos($_SERVER["QUERY_STRING"], "network=") === false){ + $request .= "&network=false"; + } + if (empty(ini_get('zlib.output_compression')) && + !$this->web_site->isCli()) { + ob_start("ob_gzhandler"); + $this->web_site->header("Content-Type: text/plain"); + e(FetchUrl::getPage($request)); + ob_end_flush(); + } else { + $this->web_site->header("Content-Type: text/plain"); + $this->web_site->header("Content-Length: " . + strlen($data)); + e(FetchUrl::getPage($request)); + flush(); + } $handled = true; } } diff --git a/src/controllers/components/SystemComponent.php b/src/controllers/components/SystemComponent.php index 3e7f09b16..c91085dcb 100755 --- a/src/controllers/components/SystemComponent.php +++ b/src/controllers/components/SystemComponent.php @@ -88,6 +88,7 @@ class SystemComponent extends Component $machine_names = $machine_model->getQueueServerNames(); $data['PARENT_MACHINES'] = array_combine($machine_names, $machine_names); + $data['PARENT'] = $machine_names[0]; $tmp = tl('system_component_select_machine'); if (isset($_REQUEST['channel']) && $_REQUEST['channel'] == -1) { $_REQUEST['num_fetchers'] = 0; diff --git a/src/executables/Mirror.php b/src/executables/Mirror.php index f8d5ad582..167f1bb3e 100644 --- a/src/executables/Mirror.php +++ b/src/executables/Mirror.php @@ -223,7 +223,7 @@ class Mirror implements CrawlConstants $session = md5($time . C\AUTH_KEY); $write_sync_time = true; $request = - $server. + $server . "?c=resource&time=$time&session=$session" . "&robot_instance=" . C\ROBOT_INSTANCE . "&machine_uri=" . C\WEB_URI . "&last_sync=" . $this->last_sync; diff --git a/src/models/Model.php b/src/models/Model.php index 24226bc24..a2f00bfaf 100755 --- a/src/models/Model.php +++ b/src/models/Model.php @@ -439,6 +439,15 @@ class Model implements CrawlConstants return false; } } + if (is_array($machine_urls) && count($machine_urls) == 1 && + C\NAME_SERVER == $machine_urls[0]) { + $mirror_table_name = C\CRAWL_DIR . "/" . self::mirror_table_name; + if (file_exists($mirror_table_name) && + time() - filemtime($mirror_table_name) < + 2 * C\MIRROR_NOTIFY_FREQUENCY) { + return false; + } + } return true; } /** diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php index 3d65200b5..ec3db4911 100755 --- a/src/models/PhraseModel.php +++ b/src/models/PhraseModel.php @@ -1415,6 +1415,12 @@ class PhraseModel extends ParallelModel $lookup_queue_servers[] = C\NAME_SERVER; //name server might still have news } + if (count($lookup_queue_servers) == 1 && + $lookup_queue_servers[0] == C\NAME_SERVER && + C\BASE_URL == C\NAME_SERVER) { + // for now only do mirror non-lookup offsets + $lookup_queue_servers = []; + } /* look up items (items we have a link summary for, but not doc summary)*/ $summaries = $this->getCrawlItems($lookups, $lookup_queue_servers, @@ -1585,7 +1591,7 @@ class PhraseModel extends ParallelModel substr(L\crawlHash("site:doc"), 0, 9)]; if ($save_timestamp_name != "") { // used for archive crawls of crawl mixes - $save_file = C\CRAWL_DIR.'/schedules/' . self::save_point . + $save_file = C\CRAWL_DIR . '/schedules/' . self::save_point . $save_timestamp_name . ".txt"; if (file_exists($save_file)) { $save_point = diff --git a/src/views/elements/ManagemachinesElement.php b/src/views/elements/ManagemachinesElement.php index 2715927d0..0ece30e6a 100644 --- a/src/views/elements/ManagemachinesElement.php +++ b/src/views/elements/ManagemachinesElement.php @@ -83,7 +83,7 @@ class ManagemachinesElement extends Element tl('managemachines_element_parent_name')?></label></th> <td><?php $this->view->helper("options")->render( "parent-machine-name", "parent", - $data['PARENT_MACHINES'], 0); ?></td></tr> + $data['PARENT_MACHINES'], $data['PARENT']); ?></td></tr> <tr id="m2"><th><label for="fetcher-number"><?= tl('managemachines_element_num_fetchers')?></label></th><td> <?php $this->view->helper("options")->render("fetcher-number",