diff --git a/src/configs/Config.php b/src/configs/Config.php index b04b9bdfe..600966cfc 100755 --- a/src/configs/Config.php +++ b/src/configs/Config.php @@ -484,7 +484,6 @@ if (file_exists(WORK_DIRECTORY . PROFILE_FILE_NAME)) { nsdefine('RESULT_SCORE', true); nsdefine('SIGNIN_LINK', true); nsdefine('SUBSEARCH_LINK', true); - nsdefine('SUFFIX_PHRASES', false); /** BM25F weight for title text */ nsdefine('TITLE_WEIGHT', 4); /** BM25F weight for other text within doc*/ @@ -896,9 +895,8 @@ nsconddefine('ENABLE_QUESTION_ANSWERING', true); * known phrase and if so do lookup with that rather than do a conjunctive * query over those terms */ -nsconddefine("SUFFIX_PHRASES", false); -/** Number of words until to switch from bag of words to phrase lookup - * if SUFFIX_PHRASES is true +/** Number of words until a string of words might be parsed as a sentence + * for question answering */ nsconddefine('PHRASE_THRESHOLD', 3); /** default number of search results to display per page */ @@ -906,11 +904,13 @@ nsconddefine('NUM_RESULTS_PER_PAGE', 10); /** Number of recently crawled urls to display on admin screen */ nsconddefine('NUM_RECENT_URLS_TO_DISPLAY', 10); /** Maximum time a set of results can stay in query cache before it is - invalidated. If negative, then never use time to kick something out of - cache. */ + * invalidated. If negative, then never use time to kick something out of + * cache. + */ nsconddefine('MAX_QUERY_CACHE_TIME', 2 * ONE_DAY); //two days /** Minimum time a set of results can stay in query cache before it is - invalidated (used for active crawl or feed results) */ + * invalidated (used for active crawl or feed results) + */ nsconddefine('MIN_QUERY_CACHE_TIME', ONE_HOUR); //one hour /** * Default number of items to page through for users,roles, mixes, etc diff --git a/src/controllers/CrawlController.php b/src/controllers/CrawlController.php index ba9cc7d3b..dc431bf7b 100644 --- a/src/controllers/CrawlController.php +++ b/src/controllers/CrawlController.php @@ -53,7 +53,7 @@ class CrawlController extends Controller implements CrawlConstants * These are the activities supported by this controller * @var array */ - public $activities = ["countWords","clearQuerySavePoint", + public $activities = ["countWords", "clearFeedData", "clearQuerySavePoint", "crawlStalled", "crawlStatus", "deleteCrawl", "injectUrlsCurrentCrawl", "combinedCrawlInfo", "getInfoTimestamp", "getCrawlItems", "getCrawlList", "getCrawlSeedInfo", "sendStartCrawlMessage", @@ -323,6 +323,13 @@ class CrawlController extends Controller implements CrawlConstants $this->model("crawl")->sendStartCrawlMessage($crawl_params, $seed_info, null, $num_fetchers); } + /** + * + */ + public function clearFeedData() + { + $this->model("source")->clearFeedData(); + } /** * A save point is used to store to disk a sequence generation-doc-offset * pairs of a particular mix query when doing an archive crawl of a crawl diff --git a/src/controllers/JobsController.php b/src/controllers/JobsController.php index f3f1bfb92..f79d1431d 100644 --- a/src/controllers/JobsController.php +++ b/src/controllers/JobsController.php @@ -102,6 +102,7 @@ class JobsController extends Controller implements CrawlConstants, public function getUpdateProperties() { $profile_model = $this->model("profile"); + $machine_model = $this->model("machine"); $profile = $profile_model->getProfile(C\WORK_DIRECTORY); $response = []; $response['MEDIA_MODE'] = (isset($profile['MEDIA_MODE'])) ? @@ -109,6 +110,7 @@ class JobsController extends Controller implements CrawlConstants, $response['SEND_MAIL_MEDIA_UPDATER'] = (isset($profile['SEND_MAIL_MEDIA_UPDATER'])) ? $profile['SEND_MAIL_MEDIA_UPDATER'] : false; + $response["JOBS_LIST"] = $machine_model->getJobsList(); echo L\webencode(serialize($response)); } -} \ No newline at end of file +} diff --git a/src/controllers/components/CrawlComponent.php b/src/controllers/components/CrawlComponent.php index 7e86c1b1c..92e531846 100644 --- a/src/controllers/components/CrawlComponent.php +++ b/src/controllers/components/CrawlComponent.php @@ -1643,7 +1643,7 @@ class CrawlComponent extends Component implements CrawlConstants $parent->updateProfileFields($data, $profile, ['IP_LINK','CACHE_LINK', 'SIMILAR_LINK', 'IN_LINK', 'RESULT_SCORE', 'SIGNIN_LINK', 'SUBSEARCH_LINK', - 'SUFFIX_PHRASES', 'WORD_SUGGEST']); + 'WORD_SUGGEST']); } $weights = ['TITLE_WEIGHT' => 4, 'DESCRIPTION_WEIGHT' => 1, 'LINK_WEIGHT' => 2, @@ -2370,8 +2370,8 @@ class CrawlComponent extends Component implements CrawlConstants $parent = $this->parent; $crawl_model = $parent->model("crawl"); $source_model = $parent->model("source"); - $source_arguments = ["addsource", "deletesource", "editsource", - "sourcesearch", "testsource"]; + $source_arguments = ["addsource", "cleardata", "deletesource", + "editsource", "sourcesearch", "testsource"]; $subsearch_arguments = ["addsubsearch", "deletesubsearch", "editsubsearch", "showsubsearch", "subsearchsearch"]; $possible_arguments = array_merge($source_arguments, @@ -2540,7 +2540,7 @@ class CrawlComponent extends Component implements CrawlConstants return $parent->redirectWithMessage( tl('crawl_component_media_source_added'), $request_fields); - break; + break; case "addsubsearch": $to_clean = ["folder_name", 'index_identifier']; $must_have = $to_clean; @@ -2564,7 +2564,20 @@ class CrawlComponent extends Component implements CrawlConstants return $parent->redirectWithMessage( tl('crawl_component_subsearch_added'), $request_fields); - break; + break; + case "cleardata": + $profile = $parent->model("profile")->getProfile( + C\WORK_DIRECTORY); + $machines = null; + if (!empty($profile['MEDIA_MODE']) && + $profile['MEDIA_MODE'] == "distributed") { + $machines = $parent->model("machine")->getMachineList(); + } + $source_model->clearFeedData($machines); + return $parent->redirectWithMessage( + tl('crawl_component_clearing_data'), + $request_fields); + break; case "deletesource": if (!isset($_REQUEST['ts'])) { return $parent->redirectWithMessage( @@ -2576,7 +2589,7 @@ class CrawlComponent extends Component implements CrawlConstants return $parent->redirectWithMessage( tl('crawl_component_media_source_deleted'), $request_fields); - break; + break; case "deletesubsearch": $_REQUEST['arg'] = "showsubsearch"; $request_fields[] = 'arg'; @@ -2591,7 +2604,7 @@ class CrawlComponent extends Component implements CrawlConstants return $parent->redirectWithMessage( tl('crawl_component_subsearch_deleted'), $request_fields); - break; + break; case "editsubsearch": $data['SEARCH_FORM_TYPE'] = "editsubsearch"; $subsearch = false; @@ -2628,7 +2641,7 @@ class CrawlComponent extends Component implements CrawlConstants tl('crawl_component_subsearch_updated'), $fields); } - break; + break; case "editsource": $data['SOURCE_FORM_TYPE'] = "editsource"; $source = false; @@ -2713,7 +2726,7 @@ class CrawlComponent extends Component implements CrawlConstants tl('crawl_component_media_source_updated'), $fields); } - break; + break; case "sourcesearch": $data['SOURCE_FORM_TYPE'] = "search"; $media_search_array = @@ -2781,7 +2794,7 @@ class CrawlComponent extends Component implements CrawlConstants $wiki_update_job->updatePodcastsOneGo([$source], C\ONE_WEEK, true); } - break; + break; } } $data['CAN_LOCALIZE'] = $parent->model("user")->isAllowedUserActivity( diff --git a/src/controllers/components/SystemComponent.php b/src/controllers/components/SystemComponent.php index 569cc6e89..1b7c93437 100755 --- a/src/controllers/components/SystemComponent.php +++ b/src/controllers/components/SystemComponent.php @@ -64,9 +64,11 @@ class SystemComponent extends Component $profile_model = $parent->model("profile"); $data = []; $data["ELEMENT"] = "managemachines"; - $possible_arguments = ["addmachine", "deletemachine", "log", "update", - "updatemode"]; - $data['SCRIPT'] = "doUpdate();"; + $possible_arguments = ["addmachine", "deletemachine", "disablejob", + "enablejob", "log", "mediajobs", "update", "updatemode"]; + $data['SCRIPT'] = (!in_array($_REQUEST['arg'], + ["disablejob", "enablejob", "mediajobs", "updatemode"])) ? + "doUpdate();" : ""; $data["leftorright"]=(L\getLocaleDirection() == 'ltr') ? "right": "left"; $data['MACHINE_NAMES'] = []; @@ -200,6 +202,36 @@ class SystemComponent extends Component ["start_row", "end_row", "num_show"]); } break; + case "disablejob": + $data["ELEMENT"] = "mediajobs"; + $_REQUEST['arg'] = 'mediajobs'; + $jobs_list = $machine_model->getJobsList(); + $job_name = $_REQUEST["job_name"] ?? ""; + if (!isset($jobs_list[$job_name])) { + return $parent->redirectWithMessage( + tl('system_component_job_doesnt_exist'), + ["arg", "start_row", "end_row", "num_show"]); + } + $machine_model->setJobStatus($job_name, false); + return $parent->redirectWithMessage( + tl('system_component_job_disabled'), + ["arg", "start_row", "end_row", "num_show"]); + break; + case "enablejob": + $data["ELEMENT"] = "mediajobs"; + $_REQUEST['arg'] = 'mediajobs'; + $jobs_list = $machine_model->getJobsList(); + $job_name = $_REQUEST["job_name"] ?? ""; + if (!isset($jobs_list[$job_name])) { + return $parent->redirectWithMessage( + tl('system_component_job_doesnt_exist'), + ["arg", "start_row", "end_row", "num_show"]); + } + $machine_model->setJobStatus($job_name, true); + return $parent->redirectWithMessage( + tl('system_component_job_enabled'), + ["arg", "start_row", "end_row", "num_show"]); + break; case "log": $log_fields = ["id" => "int", "name"=>"string", "channel" => "int", "f" => "string", @@ -252,6 +284,13 @@ class SystemComponent extends Component explode("\n", $data["LOG_FILE_DATA"])); $data["LOG_FILE_DATA"] = implode("\n", $lines); break; + case "mediajobs": + $data["ELEMENT"] = "mediajobs"; + $profile = $profile_model->getProfile(C\WORK_DIRECTORY); + $data['MEDIA_MODE'] = $profile['MEDIA_MODE'] ?? + "name_server"; + $data['JOBS_LIST'] = $machine_model->getJobsList(); + break; case "update": if (isset($_REQUEST["id"])) { $r["id"] = @@ -279,6 +318,7 @@ class SystemComponent extends Component } break; case "updatemode": + $data["ELEMENT"] = "mediajobs"; $profile = $profile_model->getProfile(C\WORK_DIRECTORY); if (isset($profile['MEDIA_MODE']) && $profile['MEDIA_MODE'] == "name_server") { @@ -288,9 +328,10 @@ class SystemComponent extends Component } $profile_model->updateProfile(C\WORK_DIRECTORY, [], $profile); + $_REQUEST['arg'] = 'mediajobs'; return $parent->redirectWithMessage( tl('system_component_updatemode_toggled'), - ["start_row", "end_row", "num_show"]); + ["arg", "start_row", "end_row", "num_show"]); break; } } diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php index 73846ca5e..7b436a9d1 100755 --- a/src/executables/ArcTool.php +++ b/src/executables/ArcTool.php @@ -285,53 +285,36 @@ class ArcTool implements CrawlConstants } else if ($bundle_name == "IndexDataFeed") { $index_timestamp = "feed"; } - $hash_paths = L\allCrawlHashPaths($word, true); - $found = false; - if (count($hash_paths) > 1) { - echo "!!Performing Looking up for phrase " . - "at each possible shift position. Outputting results for each ". - "possibility!!\n"; + $hash_key = L\crawlHashWord($word, true); + $start_time = microtime(true); + echo "Looking up in dictionary:\n"; + echo " Key: ". L\toHexString($hash_key) . "\n"; + $info = IndexManager::getWordInfo($index_timestamp, $hash_key, + -1, $start_generation, $num_generations); + echo "Dictionary Lookup Time:" . L\changeInMicrotime($start_time) + . "\n"; + if (!$info) { + echo " Key not found\n"; + exit(); } - foreach ($hash_paths as $hash_shift) { - if (is_array($hash_shift)) { - list($hash_key, $shift) = $hash_shift; - } else { - $hash_key = $hash_shift; - $shift = 0; - } - $start_time = microtime(true); - echo "Looking up in dictionary:\n"; - echo " Key: ". L\toHexString($hash_key) . "\n"; - if (is_array($hash_shift)) { - echo " Shift: ". $shift . "\n"; - } - $info = IndexManager::getWordInfo($index_timestamp, $hash_key, - $shift, -1, $start_generation, $num_generations); - echo "Dictionary Lookup Time:" . L\changeInMicrotime($start_time) - . "\n"; - if (!$info) { - echo " Key not found\n"; - continue; - } - $found = true; - echo "Dictionary Tiers: "; - $index = IndexManager::getIndex($index_timestamp); - $tiers = $index->dictionary->active_tiers; - foreach ($tiers as $tier) { - echo " $tier"; - } - echo "\nBundle Dictionary Entries for '$word':\n"; - echo "====================================\n"; - $i = 1; - foreach ($info as $record) { - echo "RECORD: $i\n"; - echo "Hex ID: " . L\toHexString($record[4])."\n"; - echo "GENERATION: {$record[0]}\n"; - echo "FIRST WORD OFFSET: {$record[1]}\n"; - echo "LAST WORD OFFSET: {$record[2]}\n"; - echo "NUMBER OF POSTINGS: {$record[3]}\n\n"; - $i++; - } + $found = true; + echo "Dictionary Tiers: "; + $index = IndexManager::getIndex($index_timestamp); + $tiers = $index->dictionary->active_tiers; + foreach ($tiers as $tier) { + echo " $tier"; + } + echo "\nBundle Dictionary Entries for '$word':\n"; + echo "====================================\n"; + $i = 1; + foreach ($info as $record) { + echo "RECORD: $i\n"; + echo "Hex ID: " . L\toHexString($record[4])."\n"; + echo "GENERATION: {$record[0]}\n"; + echo "FIRST WORD OFFSET: {$record[1]}\n"; + echo "LAST WORD OFFSET: {$record[2]}\n"; + echo "NUMBER OF POSTINGS: {$record[3]}\n\n"; + $i++; } if (!$found) { //fallback to old word hashes @@ -371,7 +354,7 @@ class ArcTool implements CrawlConstants } preg_match("/\d+$/", $archive_path, $matches); $index_timestamp = (isset($matches[0])) ? $matches[0] : 0; - if ($bundle_num >= 0) { + if (isset($bundle_num) && $bundle_num >= 0) { $index_timestamp .= "-$bundle_num"; } else if ($bundle_name == "IndexDataFeed") { $index_timestamp = "feed"; diff --git a/src/executables/MediaUpdater.php b/src/executables/MediaUpdater.php index a474a1eea..84861d0fc 100644 --- a/src/executables/MediaUpdater.php +++ b/src/executables/MediaUpdater.php @@ -109,6 +109,7 @@ class MediaUpdater implements CrawlConstants $this->update_time = 0; $this->media_mode = "name_server"; $this->media_mode = false; + $this->jobs = []; } /** * This is the function that should be called to get the MediaUpdater to @@ -120,40 +121,6 @@ class MediaUpdater implements CrawlConstants global $argv; CrawlDaemon::init($argv, "MediaUpdater"); L\crawlLog("\n\nInitialize logger..", "MediaUpdater", true); - L\crawlLog("Acquiring list of jobs..."); - $job_path = C\BASE_DIR ."/library/media_jobs/"; - $app_job_path = C\APP_DIR ."/library/media_jobs/"; - $len_path = strlen($job_path); - if (empty($argv[2])) { - $base_job_files = glob("$job_path*Job.php"); - $job_files = glob("$app_job_path*Job.php"); - foreach ($base_job_files as $job_file) { - $app_equiv_job = $app_job_path . substr($job_file, $len_path); - if (!in_array($app_equiv_job, $job_files)) { - $job_files[] = $job_file; - } - } - } else { - $job_files = []; - $pre_jobs = array_slice($argv, 2); - foreach ($pre_jobs as $pre_job) { - if (file_exists($app_job_path . "{$pre_job}Job.php")) { - $job_files[] = $app_job_path . "{$pre_job}Job.php"; - } else if (file_exists($job_path . "{$pre_job}Job.php")) { - $job_files[] = $job_path . "{$pre_job}Job.php"; - } - } - } - foreach ($job_files as $job_file) { - require_once $job_file; - $job_file_parts = pathinfo($job_file); - $job_name = C\NS_JOBS . $job_file_parts['filename']; - if ($job_name != C\NS_JOBS . "MediaJob") { - $job = new $job_name($this); - $this->jobs[] = $job; - L\crawlLog("... loading $job_name"); - } - } $this->loop(); } /** @@ -169,7 +136,7 @@ class MediaUpdater implements CrawlConstants $start_time = microtime(true); $this->getUpdateProperties(); if (!empty($this->jobs)) { - foreach ($this->jobs as $job) { + foreach ($this->jobs as $job_name => $job) { $job->run(); } } @@ -207,9 +174,52 @@ class MediaUpdater implements CrawlConstants L\crawlLog("...Setting mail mode to: " . (($this->mail_mode) ? "true" : "false")); } + if (isset($properties['JOBS_LIST'])) { + $this->loadJobs($properties['JOBS_LIST']); + } } L\crawlLog("Done checking Name Server for Media Updater properties"); } + /** + * + */ + public function loadJobs($jobs_list) + { + L\crawlLog("Updating list of jobs..."); + $job_path = C\BASE_DIR . "/library/media_jobs/"; + $app_job_path = C\APP_DIR . "/library/media_jobs/"; + $len_path = strlen($job_path); + $job_files = []; + foreach ($jobs_list as $job => $enabled) { + if (!$enabled) { + continue; + } + if (file_exists($app_job_path . "{$job}Job.php")) { + $job_files[$job] = $app_job_path . "{$job}Job.php"; + } else if (file_exists($job_path . "{$job}Job.php")) { + $job_files[$job] = $job_path . "{$job}Job.php"; + } + } + foreach ($this->jobs as $job_name => $job) { + if (empty($job_files[$job_name])) { + L\crawlLog("...Removing $job_name from list of jobs"); + unset($this->jobs[$job_name]); + } + } + foreach ($job_files as $job_name => $job_file) { + if (!empty($this->jobs[$job_name])) { + continue; + } + L\crawlLog("...Adding $job_name to list of jobs"); + require_once $job_file; + $job_class_name = C\NS_JOBS . $job_name . "Job"; + if ($job_class_name != C\NS_JOBS . "MediaJob") { + $job = new $job_class_name($this); + $this->jobs[$job_name] = $job; + L\crawlLog("... done."); + } + } + } } /* * Instantiate and run the MediaUpdater program diff --git a/src/index.php b/src/index.php index 3a2eb8ab0..d7213e236 100644 --- a/src/index.php +++ b/src/index.php @@ -78,7 +78,7 @@ function bootstrap($web_site = null, $start_new_session = true) * Load global functions related to localization */ require_once __DIR__ . "/library/LocaleFunctions.php"; - ini_set("memory_limit","500M"); + ini_set("memory_limit","1000M"); if (!empty($web_site)) { if ((empty($_REQUEST['c']) || $_REQUEST['c'] != 'resource')) { $web_site->header("X-FRAME-OPTIONS: DENY"); //prevent click-jacking diff --git a/src/library/IndexDictionary.php b/src/library/IndexDictionary.php index 2b35585c7..2567c3cbc 100644 --- a/src/library/IndexDictionary.php +++ b/src/library/IndexDictionary.php @@ -727,8 +727,6 @@ class IndexDictionary implements CrawlConstants * * @param string $word_id id of the word or phrase one wants to look up * @param bool $raw whether the id is our version of base64 encoded or not - * @param int $shift how many low order bits to drop from $word_id's - * when checking for a match * @param int $threshold if greater than zero how many posting list * results in dictionary info returned before stopping looking for * more matches @@ -743,7 +741,7 @@ class IndexDictionary implements CrawlConstants * element as above and first element the estimated total number of * of docs */ - public function getWordInfo($word_id, $raw = false, $shift = 0, + public function getWordInfo($word_id, $raw = false, $threshold = -1, $start_generation = -1, $num_distinct_generations = -1, $with_remaining_total = false) { @@ -752,7 +750,7 @@ class IndexDictionary implements CrawlConstants $found_count = 0; $current_max_generation = -2; foreach ($this->active_tiers as $tier) { - $tier_info = $this->getWordInfoTier($word_id, $raw, $tier, $shift, + $tier_info = $this->getWordInfoTier($word_id, $raw, $tier, $threshold, $start_generation, $num_distinct_generations); if (is_array($tier_info) && isset($tier_info[2]) && is_array($tier_info[2])) { @@ -802,8 +800,6 @@ class IndexDictionary implements CrawlConstants * @param bool $raw whether the id is our version of base64 encoded or * not * @param int $tier which tier to get word info from - * @param int $shift how many low order bits to drop from $word_id's - * when checking for a match * @param int $threshold if greater than zero how many posting list * results in dictionary info returned before stopping looking for * more matches @@ -817,7 +813,7 @@ class IndexDictionary implements CrawlConstants * generation, first offset, last offset, count, matched_key) or * false if no data */ - public function getWordInfoTier($word_id, $raw, $tier, $shift = 0, + public function getWordInfoTier($word_id, $raw, $tier, $threshold = -1, $start_generation = -1, $num_distinct_generations = -1) { $num_generations = 0; @@ -902,7 +898,7 @@ class IndexDictionary implements CrawlConstants } } $id = substr($word_string, 0, $word_key_len); - $cmp = compareWordHashes($word_id, $id, $shift); + $cmp = compareWordHashes($word_id, $id); if ($cmp === 0) { $found = true; break; @@ -952,11 +948,9 @@ class IndexDictionary implements CrawlConstants } } } - // if shift is 0 then at most one record in dictionary matches - if ($shift == 0) { - $this->formatWordInfo($total_count, $max_retained_generation, - $info); - } + // at most one record in dictionary matches + $this->formatWordInfo($total_count, $max_retained_generation, + $info); //up to first record with word id $test_loc = $check_loc - 1; $start_loc = $check_loc; @@ -979,7 +973,7 @@ class IndexDictionary implements CrawlConstants continue; } $id = substr($word_string, 0, $word_key_len); - if (compareWordHashes($word_id, $id, $shift) != 0 ) { + if (compareWordHashes($word_id, $id) != 0 ) { $break_count++; if ($break_count > 1) { break; @@ -1048,7 +1042,7 @@ class IndexDictionary implements CrawlConstants $test_loc * $word_item_len, $word_item_len); if ($word_string == "" ) break; $id = substr($word_string, 0, $word_key_len); - if (compareWordHashes($word_id, $id, $shift) != 0 ) { + if (compareWordHashes($word_id, $id) != 0 ) { $break_count++; if ($break_count > 1) { break; diff --git a/src/library/IndexManager.php b/src/library/IndexManager.php index fd6d3474d..4bec3eaaf 100644 --- a/src/library/IndexManager.php +++ b/src/library/IndexManager.php @@ -176,8 +176,6 @@ class IndexManager implements CrawlConstants * @param string $index_name bundle to look $hash in * @param string $hash hash of phrase or word to look up in bundle * dictionary - * @param int $shift if $hash is for a phrase, how many low order - * bits of word id to discard * @param int $threshold after the number of results exceeds this amount * stop looking for more dictionary entries. * @param int $start_generation what generation in the index to start @@ -191,8 +189,8 @@ class IndexManager implements CrawlConstants * (index_shard generation, posting_list_offset, length, exact id * that match $hash) */ - public static function getWordInfo($index_name, $hash, $shift = 0, - $threshold = -1, $start_generation = -1, $num_distinct_generations = -1, + public static function getWordInfo($index_name, $hash, $threshold = -1, + $start_generation = -1, $num_distinct_generations = -1, $with_remaining_total = false) { $index = self::getIndex($index_name); @@ -200,9 +198,8 @@ class IndexManager implements CrawlConstants $pre_info = []; if (!empty($index->dictionary)) { $pre_info = - $index->dictionary->getWordInfo($hash, true, $shift, - $threshold, $start_generation, - $num_distinct_generations, true); + $index->dictionary->getWordInfo($hash, true, $threshold, + $start_generation, $num_distinct_generations, true); } $last_desired_generation = $start_generation + $num_distinct_generations; @@ -225,8 +222,7 @@ class IndexManager implements CrawlConstants $index->non_merged_shard = $active_shard; $index->non_merged_generation = $active_generation; } - $active_info = $active_shard->getWordInfo($hash, true, - $shift); + $active_info = $active_shard->getWordInfo($hash, true); if (is_array($active_info)) { if (empty($pre_info)) { $pre_info[0] = 0; @@ -272,19 +268,10 @@ class IndexManager implements CrawlConstants if (!$index->dictionary) { return false; } - $pos = -1; - $total_num_docs = 0; - $hashes = allCrawlHashPaths($term_or_phrase, true); - foreach ($hashes as $hash) { - list($num_docs, ) = - self::getWordInfo($index_name, $hash[0], - $hash[1], $threshold, $start_generation, - $num_distinct_generations, true); - $total_num_docs += $num_docs; - if ($threshold > 0 && $total_num_docs > $threshold) { - return $total_num_docs; - } - } + $hash = crawlHashWord($term_or_phrase, true); + list($total_num_docs, ) = self::getWordInfo($index_name, $hash, + $hash[1], $threshold, $start_generation, + $num_distinct_generations, true); return $total_num_docs; } } diff --git a/src/library/IndexShard.php b/src/library/IndexShard.php index fc5851e26..a307cfa39 100644 --- a/src/library/IndexShard.php +++ b/src/library/IndexShard.php @@ -493,21 +493,8 @@ class IndexShard extends PersistentStructure implements CrawlConstants //using $this->docids_len divisible by 16 $doc_offset = $this->docids_len >> 4; foreach ($word_lists as $word => $position_list) { - if (isset($position_list["cond_max"])) { //for now - $word_id = crawlHashPath($word, $position_list["cond_max"], - true); - /* cond_max corresponds to a phrase in the document - as each word will also be stored in addition to - the phrase, and as we are using $occurences to - count the number of words in th document, we don't - want to double count so we set to zero for this case - */ - $occurrences = 0; - unset($position_list["cond_max"]); - } else { - $word_id = crawlHashWord($word, true); - $occurrences = count($position_list); - } + $word_id = crawlHashWord($word, true); + $occurrences = count($position_list); $store = packPosting($doc_offset, $position_list); if (!isset($this->words[$word_id])) { $this->words[$word_id] = $store; @@ -553,12 +540,9 @@ class IndexShard extends PersistentStructure implements CrawlConstants * * @param string $word_id id of the word one wants to look up * @param bool $raw whether the id is our version of base64 encoded or not - * @param int $shift how many low order bits to drop from $word_id's - * when checking for a match - * @return array first offset, last offset, count, exact matching id ( - * recall match can ignore low order shift bits) + * @return array first offset, last offset, count, exact matching id */ - public function getWordInfo($word_id, $raw = false, $shift = 0) + public function getWordInfo($word_id, $raw = false) { if ($raw == false) { //get rid of out modified base64 encoding @@ -600,7 +584,7 @@ class IndexShard extends PersistentStructure implements CrawlConstants return false; } $id = substr($word_string, 0, $word_key_len); - $cmp = compareWordHashes($word_id, $id, $shift); + $cmp = compareWordHashes($word_id, $id); if ($cmp === 0) { $tmp_info = $this->getWordInfoFromString( substr($word_string, $word_key_len)); diff --git a/src/library/PhraseParser.php b/src/library/PhraseParser.php index d90f2fd08..d04d96395 100755 --- a/src/library/PhraseParser.php +++ b/src/library/PhraseParser.php @@ -193,15 +193,13 @@ class PhraseParser $threshold, $with_pseudo_count = false) { $num_terms = count($terms); - if (($num_terms <= 1 || C\SUFFIX_PHRASES != 'true') && - !$with_pseudo_count) { + if ($num_terms <= 1 && !$with_pseudo_count) { return $terms; } $whole_phrase = implode(" ", $terms); $num_whole_phrase_results = IndexManager::numDocsTerm($whole_phrase, $index_name, $threshold); - if (($num_terms <= 1 || C\SUFFIX_PHRASES != 'true') && - $with_pseudo_count) { + if ($num_terms <= 1 && $with_pseudo_count) { return [$terms, $num_whole_phrase_results]; } /* @@ -287,8 +285,8 @@ class PhraseParser public static function extractPhrasesInLists($string, $lang = null) { $start_time = microtime(true); - $phrase_list = ['TIMES' => [ - 'CANONICALIZE' => 0, 'MAXIMAL_TERMS' => 0, + $phrase_list = ['TIMES' => [ 'CANONICALIZE' => 0, + 'TERM_POSITIONS_SENTENCE_TAGGING' => 0, 'QUESTION_ANSWER_EXTRACT' => 0]]; if (!isset(self::$programming_language_map[$lang])) { self::canonicalizePunctuatedTerms($string, $lang); @@ -297,12 +295,12 @@ class PhraseParser changeInMicrotime($start_time); } $maximal_terms_start_time = microtime(true); - $phrase_and_sentences = self::extractMaximalTermsAndFilterPhrases( + $phrase_and_sentences = self::extractTermSentencePositionsTags( $string, $lang, C\ENABLE_QUESTION_ANSWERING); - if (empty($phrase_and_sentences["TERMS_AND_PHRASES"])) { - $phrase_and_sentences["TERMS_AND_PHRASES"] = []; + if (empty($phrase_and_sentences["TERM_POSITIONS"])) { + $phrase_and_sentences["TERM_POSITIONS"] = []; } - $phrase_list['TIMES']['MAXIMAL_TERMS'] = + $phrase_list['TIMES']['TERM_POSITIONS_SENTENCE_TAGGING'] = changeInMicrotime($maximal_terms_start_time); if (C\ENABLE_QUESTION_ANSWERING && !empty($phrase_and_sentences["SENTENCES"])) { @@ -312,8 +310,8 @@ class PhraseParser method_exists($tokenizer, "extractTripletsPhrases")) { $triplets_list = $tokenizer->extractTripletsPhrases( $phrase_and_sentences["SENTENCES"], $lang); - $phrase_and_sentences["TERMS_AND_PHRASES"] = - $phrase_and_sentences["TERMS_AND_PHRASES"] + + $phrase_and_sentences["TERM_POSITIONS"] = + $phrase_and_sentences["TERM_POSITIONS"] + $triplets_list['QUESTION_LIST']; $phrase_list['QUESTION_ANSWER_LIST'] = $triplets_list['QUESTION_ANSWER_LIST']; @@ -321,7 +319,7 @@ class PhraseParser changeInMicrotime($qa_start_time); } } - $phrase_list['WORD_LIST'] = $phrase_and_sentences["TERMS_AND_PHRASES"]; + $phrase_list['WORD_LIST'] = $phrase_and_sentences["TERM_POSITIONS"]; return $phrase_list; } /** @@ -440,8 +438,9 @@ class PhraseParser } /** * Splits string according to punctuation and white space then - * extracts (stems/char grams) of terms and n word grams from the string - * Uses a notion a of maximal n word gram to do the extraction + * extracts (stems/char grams) of terms and makes a position. Then + * splits string according to senttences and make a position list for + * sentences * * @param string $string to extract terms from * @param string $lang IANA tag to look up stemmer under @@ -450,7 +449,7 @@ class PhraseParser * @return array of terms and n word grams in the order they appeared in * string */ - public static function extractMaximalTermsAndFilterPhrases($string, + public static function extractTermSentencePositionsTags($string, $lang = null, $extract_sentences = false) { $pos_lists = []; @@ -458,10 +457,6 @@ class PhraseParser if (empty($terms)) { return []; } - if (C\SUFFIX_PHRASES == 'true') { - $suffix_tree = new SuffixTree($terms); - $suffix_tree->outputMaximal(1, "", 0, $pos_lists); - } $t = 1; /*first position in doc is 1 as will encode with modified9 which requires positive numbers */ @@ -473,7 +468,7 @@ class PhraseParser $pos_lists[$term][] = $t; $t++; } - $out["TERMS_AND_PHRASES"] = $pos_lists; + $out["TERM_POSITIONS"] = $pos_lists; $tokenizer = self::getTokenizer($lang); if ($extract_sentences && method_exists($tokenizer, "tagTokenizePartOfSpeech") && diff --git a/src/library/StochasticTermSegmenter.php b/src/library/StochasticTermSegmenter.php index ba6f6bdbc..c807f0bd2 100644 --- a/src/library/StochasticTermSegmenter.php +++ b/src/library/StochasticTermSegmenter.php @@ -300,7 +300,7 @@ class StochasticTermSegmenter */ public function segmentSentence($sentence) { - $t=explode(" ",$sentence); + $t = explode(" ",$sentence); if(count($t) > 1) { $ret = []; foreach($t as $s) { @@ -308,7 +308,6 @@ class StochasticTermSegmenter } return $ret; } - if (!$this->dictionary_file) { $dic_file = C\LOCALE_DIR . "/{$this->lang}/resources/term_weight.txt.gz"; @@ -317,7 +316,7 @@ class StochasticTermSegmenter return null; } $this->dictionary_file = - json_decode(gzdecode(file_get_contents($dic_file)),true); + json_decode(gzdecode(file_get_contents($dic_file)), true); $this->unknown_term_score = $this->getScore(1); } preg_match_all('/./u', trim($sentence), $matches); diff --git a/src/library/Utility.php b/src/library/Utility.php index 0585b9095..de28b6223 100755 --- a/src/library/Utility.php +++ b/src/library/Utility.php @@ -1368,39 +1368,15 @@ function crawlHashPath($string, $path_start = 0, $raw = false) } /** * Used to compare to ids for index dictionary lookup. ids - * might be either a crawlHash or a 8 byte crawlHash together - * with 12 byte hash path for suffix tree lookup. In the latter - * case the shift variable can be used to match up to a subtree + * are a 8 byte crawlHash together with 12 byte non-hash suffix. * * @param string $id1 20 byte word id to compare * @param string $id2 20 byte word id to compare - * @param int $shift bit shift to apply before saying paths equal * @return int negative if $id1 smaller, positive if bigger, and 0 if * same */ -function compareWordHashes($id1, $id2, $shift = 0) +function compareWordHashes($id1, $id2) { - if ($shift < 32) { - $cmp = strncmp($id1, $id2, 16); - } else if ($shift < 64) { - $cmp = strncmp($id1, $id2, 12); - } else { - $cmp = strncmp($id1, $id2, 8); - } - if ($cmp != 0) { - return $cmp; - } - if ($shift < 32) { - $pos = 16; - } else if ($shift < 64) { - $shift -= 32; - $pos = 12; - } else { - $shift -= 64; - $pos = 8; - } - $id1 = packInt(unpackInt(substr($id1, $pos, 4)) >> $shift); - $id2 = packInt(unpackInt(substr($id2, $pos, 4)) >> $shift); return strcmp($id1, $id2); } /** diff --git a/src/library/VersionFunctions.php b/src/library/VersionFunctions.php index 0697d7940..ea197a271 100644 --- a/src/library/VersionFunctions.php +++ b/src/library/VersionFunctions.php @@ -330,8 +330,8 @@ function upgradeDatabaseVersion13(&$db) $db->execute("CREATE TABLE FEED_ITEM (GUID VARCHAR(11) PRIMARY KEY, TITLE VARCHAR(512), LINK VARCHAR(256), DESCRIPTION VARCHAR(4096), PUBDATE INT, SOURCE_NAME VARCHAR(16))"); - if (!file_exists(C\WORK_DIRECTORY."/feeds")) { - mkdir(C\WORK_DIRECTORY."/feeds"); + if (!file_exists(C\WORK_DIRECTORY . "/feeds")) { + mkdir(C\WORK_DIRECTORY . "/feeds"); } upgradeLocales(); //force locale upgrade } @@ -1861,6 +1861,10 @@ function upgradeDatabaseVersion68(&$db) $db->execute("DELETE FROM MIX_COMPONENTS WHERE TIMESTAMP = 4 AND FRAGMENT_ID = 0"); $db->execute("INSERT INTO MIX_COMPONENTS(TIMESTAMP, - FRAGMENT_ID, CRAWL_TIMESTAMP, WEIGHT, DIRECTION, KEYWORDS) + FRAGMENT_ID, CRAWL_TIMESTAMP, WEIGHT, DIRECTION, KEYWORDS) VALUES (4, 0, 100, 1, -1, 'media:news')"); + $db->execute("DROP TABLE FEED_ITEM"); + if (file_exists(C\WORK_DIRECTORY . "/feeds")) { + $db->unlinkRecursive(C\WORK_DIRECTORY . "/feeds"); + } } diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php index c59e83a86..72eee74cb 100644 --- a/src/library/index_bundle_iterators/WordIterator.php +++ b/src/library/index_bundle_iterators/WordIterator.php @@ -53,12 +53,6 @@ class WordIterator extends IndexBundleIterator * @var string */ public $word_key; - /** - * Position from end of key that doesn't have to be an exact match - * (for phrases as using suffix tree) - * @var int - */ - public $shift; /** * The timestamp of the index is associated with this iterator * @var string @@ -140,8 +134,6 @@ class WordIterator extends IndexBundleIterator * Creates a word iterator with the given parameters. * * @param string $word_key hash of word or phrase to iterate docs of - * @param string $shift up to what point in key should be a match - * when do dictionary look up (for phrases because using suffix tree) * @param string $index_name time_stamp of the to use * @param bool $raw whether the $word_key is our variant of base64 encoded * @param SearchfiltersModel $filter Model responsible for keeping track @@ -155,7 +147,7 @@ class WordIterator extends IndexBundleIterator * could in theory open two read only versions of the same bundle but * reading the results in different directions */ - public function __construct($word_key, $shift, $index_name, $raw = false, + public function __construct($word_key, $index_name, $raw = false, $filter = null, $results_per_block = IndexBundleIterator::RESULTS_PER_BLOCK, $direction=self::ASCENDING) { @@ -166,10 +158,9 @@ class WordIterator extends IndexBundleIterator $this->direction = $direction; $this->filter = $filter; $this->word_key = $word_key; - $this->shift = $shift; $this->index_name = $index_name; list($this->num_docs, $this->dictionary_info) = - IndexManager::getWordInfo($index_name, $word_key, $shift, + IndexManager::getWordInfo($index_name, $word_key, -1, -1, C\NUM_DISTINCT_GENERATIONS, true); if ($this->dictionary_info === false) { $this->empty = true; diff --git a/src/library/media_jobs/FeedsUpdateJob.php b/src/library/media_jobs/FeedsUpdateJob.php index 3aa964092..f95a3b98c 100644 --- a/src/library/media_jobs/FeedsUpdateJob.php +++ b/src/library/media_jobs/FeedsUpdateJob.php @@ -86,14 +86,10 @@ class FeedsUpdateJob extends MediaJob $this->update_time = 0; $this->name_server_does_client_tasks = true; $this->name_server_does_client_tasks_only = true; - $dir = C\CRAWL_DIR . '/cache/' . self::feed_index_data_base_name; - $info['DESCRIPTION'] = "feed"; - $this->index_archive = new FeedArchiveBundle($dir, false, - serialize($info), C\NUM_DOCS_PER_GENERATION); $db_class = C\NS_DATASOURCES . ucfirst(C\DBMS). "Manager"; $this->db = new $db_class(); $this->db->connect(); - $this->db->setWorldPermissionsRecursive($dir); + $this->getFeedArchive(); C\nsconddefine("FEEDS_UPDATE_INTERVAL", C\ONE_HOUR); } /** @@ -708,12 +704,13 @@ class FeedsUpdateJob extends MediaJob $this->addTermCountsTrendingTable($db, $term_counts); L\crawlLog("----..adding items to IndexArchiveBundle"); // 1. check if index shard is full or not. if it is, new gen - $generation = $this->index_archive->initGenerationToAdd( + $index_archive = $this->getFeedArchive(); + $generation = $index_archive->initGenerationToAdd( $tmp_shard->num_docs); $summary_offsets = []; if (!empty($seen_sites)) { // 2. add pages, get summary_offset - $this->index_archive->addPagesAndSeenKeys($generation, + $index_archive->addPagesAndSeenKeys($generation, self::SUMMARY_OFFSET, self::HASH, $seen_sites, $seen_url_count); foreach ($seen_sites as $site) { $site_url = str_replace('|', "%7C", $site[self::URL]); @@ -730,9 +727,9 @@ class FeedsUpdateJob extends MediaJob $tmp_shard = IndexShard::load("feed_data", $tmp_string); if (!empty($summary_offsets)) { $tmp_shard->changeDocumentOffsets($summary_offsets); - $this->index_archive->addIndexData($tmp_shard); + $index_archive->addIndexData($tmp_shard); } - $this->index_archive->forceSave(); + $index_archive->forceSave(); if (file_exists($tmp_shard_name)) { unlink($tmp_shard_name); } @@ -857,8 +854,8 @@ class FeedsUpdateJob extends MediaJob * Removes entries older than a week * * @param resource $db handle to database with TRENDING_TERM table - * @param array $term_counts for the most recent uupdate of the - * FEED_ITEM table an array [$lang => [$term => $occurences]] + * @param array $term_counts for the most recent update of the + * feed index, it should be an array [$lang => [$term => $occurences]] * for the top NUM_TRENDING terms per language */ public function addTermCountsTrendingTable($db, $term_counts) @@ -939,7 +936,7 @@ class FeedsUpdateJob extends MediaJob } } /** - * Adds $item to FEED_ITEM table in db if it isn't already there + * Adds $item to feed index bundle if it isn't already there * * @param array $item data from a single feed item * @param string $source_name string name of the feed $item was found @@ -989,7 +986,8 @@ class FeedsUpdateJob extends MediaJob if (time() - $out_item["PUBDATE"] > $age) { return false; } - if ($this->index_archive->contains($out_item["GUID"])) { + $index_archive = $this->getFeedArchive(); + if ($index_archive->contains($out_item["GUID"])) { return false; } $out_item['SOURCE_NAME'] = $source_name; @@ -1112,4 +1110,15 @@ class FeedsUpdateJob extends MediaJob } } } + public function getFeedArchive() + { + $dir = C\CRAWL_DIR . '/cache/' . self::feed_index_data_base_name; + if (!file_exists($dir) || empty($this->index_archive)) { + $info['DESCRIPTION'] = "feed"; + $this->index_archive = new FeedArchiveBundle($dir, false, + serialize($info), C\NUM_DOCS_PER_GENERATION); + $this->db->setWorldPermissionsRecursive($dir); + } + return $this->index_archive; + } } diff --git a/src/locale/ar/configure.ini b/src/locale/ar/configure.ini index c7dd4168a..87fd44ac7 100755 --- a/src/locale/ar/configure.ini +++ b/src/locale/ar/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "مصدر الوسائط المضافة!" crawl_component_subsearch_added = "وأضاف سوبسيرتش!" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "مصدر الوسائط المحذوفة!" crawl_component_subsearch_deleted = "حذف سوبسيرتش!" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "الحقول المفقودة من نمو system_component_machine_doesnt_exists = "اسم الجهاز غير موجود!" system_component_stop_service_first = "الجهاز قيد الاستخدام. الرجاء إيقاف خدمة قيد التشغيل على ذلك! " system_component_machine_deleted = "حذف آلة!" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "لا يوجد ملف السجل وجدت." system_component_machine_servers_updated = "آلة #039;s Servers Updated!" system_component_machine_no_action = "غير قادر على تنفيذ الإجراء!" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "سجل" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "مصادر إعلامية" searchsources_element_subsearches = "سوبسيرتشيس الحالي" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "الاسم" searchsources_element_action = "العمل" searchsources_element_sourcetype = "نوع:" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "نطاق البايت للتحميل (0-ال pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "السماح للصفحة ريكراول بعد:" pageoptions_element_file_types = "ملف صفحة أنواع للزحف:" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "القادم" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/bn/configure.ini b/src/locale/bn/configure.ini index 35362a452..c1a731ed0 100755 --- a/src/locale/bn/configure.ini +++ b/src/locale/bn/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/de/configure.ini b/src/locale/de/configure.ini index 4ecc9b7e2..13217c325 100755 --- a/src/locale/de/configure.ini +++ b/src/locale/de/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/en_US/configure.ini b/src/locale/en_US/configure.ini index 5eb6404a3..ddd5093e1 100644 --- a/src/locale/en_US/configure.ini +++ b/src/locale/en_US/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "Invalid URL!" crawl_component_missing_fields = "All Fields Need to be Filled!" crawl_component_media_source_added = "Media Source Added!" crawl_component_subsearch_added = "Subsearch Added!" +crawl_component_clearing_data = "Clearing News and Trending Data!" crawl_component_no_delete_source = "Source Was Not Deleted!" crawl_component_media_source_deleted = "Media Source Deleted!" crawl_component_subsearch_deleted = "Subsearch Deleted!" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "Missing Fields From Machine Form!" system_component_machine_doesnt_exists = "Machine Name does not Exists!" system_component_stop_service_first = "Machine in use. Please stop the service running on it!" system_component_machine_deleted = "Machine Deleted!" +system_component_job_doesnt_exist = "Job Doesn't Exist!" +system_component_job_disabled = "Job Disabled!" +system_component_job_enabled = "Job Enabled!" system_component_no_machine_log = "No Log File Found." system_component_machine_servers_updated = "Machine's Servers Updated!" system_component_machine_no_action = "Unable to Perform Action!" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "Account not created - Username alread ; ; MachinestatusView.php machinestatus_view_media_updater = "Media Updater" -machinestatus_view_mode = "Mode:" +machinestatus_view_configure_media_jobs = "Configure Media Jobs" machinestatus_view_nameserver = "Name Server" -machinestatus_view_distributed = "Distributed" machinestatus_view_log = "Log" machinestatus_view_machines = "Machines" machinestatus_view_add_machine = "Add Machine" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "Aux Url XPaths:" searchsources_element_link_xpath_text = "Download Xpath:" searchsources_element_media_sources = "Media Sources" searchsources_element_subsearches = "Subsearches" +searchsources_element_confirm_delete = "Do you really want to clear previously downloaded news feed data?" +searchsources_element_clear_news_trending = "Clear Current News Feeds and Trending Data" searchsources_element_medianame = "Name" searchsources_element_action = "Action" searchsources_element_sourcetype = "Type:" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "Byte Range to Download (0 - Value):" pageoptions_element_summarizer = "Summarizer:" pageoptions_element_max_description = "Max Page Summary Length in Bytes:" pageoptions_element_cache_pages = "Cache Whole Crawled Pages:" -pageoptions_element_suffix_phrases = "Extract Suffix Phrases:" pageoptions_element_allow_recrawl = "Allow Page Recrawl After:" pageoptions_element_file_types = "Page File Types to Crawl:" pageoptions_element_classifiers_rankers = "Classifiers and Rankers" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "Page Processing Times" pageoptions_element_page_process_time = "Page Process Times:" pageoptions_element_rule_process_time = "Page Rule Process Times:" pageoptions_element_canon_time = "Canonicalize Terms:" -pageoptions_element_maximal_time = "Maximal Term Extraction:" +pageoptions_element_term_pos_sentence_tag_time = "Term Pos and Sentence Tag Time:" pageoptions_element_qa_time = "Question Answer Extraction:" pageoptions_element_total_time = "Total Time:" pageoptions_element_time_seconds = "%s seconds" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "Group Feeds No Longer Updating!" ; PaginationElement.php pagination_helper_next = "Next" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "Configure Media Jobs" +mediajobs_element_mode = "Mode" +mediajobs_element_nameserver = "Name Server" +mediajobs_element_distributed = "Distributed" +mediajobs_element_jobs_list = "Jobs List" +mediajobs_element_job_name = "Job Name" +mediajobs_element_run_status = "Run Status" +mediajobs_element_on = "On" +mediajobs_element_off = "Off" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "Classifiers" manageclassifiers_element_label_col = "Label" diff --git a/src/locale/es/configure.ini b/src/locale/es/configure.ini index 416ab87a1..aeeb5aa44 100755 --- a/src/locale/es/configure.ini +++ b/src/locale/es/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "Falta de Campos, en el Formulario de la M system_component_machine_doesnt_exists = "El Nombre de la Máquina no Existe!" system_component_stop_service_first = "Máquina en Uso. Por favor, Detenga el Servicio que se Ejecuta en él!" system_component_machine_deleted = "Máquina Eliminada!" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "No se Encontró el Archivo de Registro." system_component_machine_servers_updated = "Servidores de Máquina's Actualizados!" system_component_machine_no_action = "No se Puede Realizar la Acción!" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/fa/configure.ini b/src/locale/fa/configure.ini index 1e30f1801..9dca71acf 100755 --- a/src/locale/fa/configure.ini +++ b/src/locale/fa/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "منبع رسانه‌ها اضافه شد!" crawl_component_subsearch_added = "زیرجستجو اضافه شد!" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "منبع رسانه‌ها حذف شد!" crawl_component_subsearch_deleted = "زیر جستجو حذف شد!" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "بعضی قسمت‌های فرم دس system_component_machine_doesnt_exists = "این دستگاه وجود ندارد!" system_component_stop_service_first = "دستگاه در حال استفاده است. لطفن کاری که انجام می‌دهد را متوقف کنید!" system_component_machine_deleted = "دستگاه حذف شد!" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "Log File پیدا نشد." system_component_machine_servers_updated = "سرورهای دستگاه به روز شدند!" system_component_machine_no_action = "نمی‌توان این فرمان را اجرا کرد!" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "گزارش" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "منابع رسانه" searchsources_element_subsearches = "زیرجستجوهای فعلی" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "نام" searchsources_element_action = "فرمان" searchsources_element_sourcetype = "نوع:" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "دامنهٔ میزان بایتی که بار pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "بگذار صفحه بعد از این مدت دوباره خزیده شود:" pageoptions_element_file_types = "نوع فایل صفحاتی که خزیده می‌شوند:" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "بعد" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/fr_FR/configure.ini b/src/locale/fr_FR/configure.ini index 8ef5dfd51..ed16978ac 100755 --- a/src/locale/fr_FR/configure.ini +++ b/src/locale/fr_FR/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "Proch." ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/he/configure.ini b/src/locale/he/configure.ini index d1c15118a..80d6bf7f9 100755 --- a/src/locale/he/configure.ini +++ b/src/locale/he/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/hi/configure.ini b/src/locale/hi/configure.ini index 646a844de..b1fa5171e 100755 --- a/src/locale/hi/configure.ini +++ b/src/locale/hi/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/in_ID/configure.ini b/src/locale/in_ID/configure.ini index b1d16423c..2170f5d8b 100755 --- a/src/locale/in_ID/configure.ini +++ b/src/locale/in_ID/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "Sesudah" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/it/configure.ini b/src/locale/it/configure.ini index 0198f96cf..0d37154d9 100755 --- a/src/locale/it/configure.ini +++ b/src/locale/it/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "Campi mancanti dal modulo Macchina!" system_component_machine_doesnt_exists = "Nome Macchina inesistente!" system_component_stop_service_first = "Macchina in uso. Ferma il servizio che sta svolgendo!" system_component_machine_deleted = "Macchina cancellata!" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "Nessun file di log trovato." system_component_machine_servers_updated = "Server macchina aggiornato!" system_component_machine_no_action = "Impossibile svolgere azione!" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "Log" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "Quantità Byte da scaricare (0 - Valore) pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "Permetti Scansione pagina dopo:" pageoptions_element_file_types = "Tipologia pagine da Scansionare:" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "Prossimo" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/ja/configure.ini b/src/locale/ja/configure.ini index 8afb4539e..6c823bf29 100755 --- a/src/locale/ja/configure.ini +++ b/src/locale/ja/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "次の" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/kn/configure.ini b/src/locale/kn/configure.ini index 8ca931740..d30e33885 100755 --- a/src/locale/kn/configure.ini +++ b/src/locale/kn/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "ಮುಂದಿನ" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/ko/configure.ini b/src/locale/ko/configure.ini index 57f653260..ed5177832 100755 --- a/src/locale/ko/configure.ini +++ b/src/locale/ko/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "다음" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/nl/configure.ini b/src/locale/nl/configure.ini index 1d19b99d8..314d300e1 100644 --- a/src/locale/nl/configure.ini +++ b/src/locale/nl/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "Ongeldige URL!" crawl_component_missing_fields = "Alle velden moeten worden ingevuld!" crawl_component_media_source_added = "Media Source toegevoegd!" crawl_component_subsearch_added = "Subsearch toegevoegd!" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "Bron niet is verdwenen!" crawl_component_media_source_deleted = "Media Source Deleted!" crawl_component_subsearch_deleted = "Subsearch Deleted!" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "Ontbrekende of ongeldige Velden Van Machi system_component_machine_doesnt_exists = "Machine Naam bestaat niet!" system_component_stop_service_first = "Machine gebruikt. Stop aub de dienst draait op het!" system_component_machine_deleted = "Machine verwijderd!" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "Geen Log File Found." system_component_machine_servers_updated = "Servers machines Bijgewerkt!" system_component_machine_no_action = "Niet in staat om actie uit te voeren!" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "Geen account aangemaakt - Gebruikersn ; ; MachinestatusView.php machinestatus_view_media_updater = "media Updater" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "Inloggen" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "media Bronnen" searchsources_element_subsearches = "huidige Subsearches" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "naam" searchsources_element_action = "actie" searchsources_element_sourcetype = "Type:" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "Byte Range te downloaden (0 - Waarde):" pageoptions_element_summarizer = "Summarizer:" pageoptions_element_max_description = "Max Pagina Samenvatting Lengte in Bytes:" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "Toestaan pagina opnieuw crawlen Na:" pageoptions_element_file_types = "Pagina Bestandstypen te kruipen:" pageoptions_element_classifiers_rankers = "Classifiers en Rankers" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "Groep feeds niet meer updaten!" ; PaginationElement.php pagination_helper_next = "volgende" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/pl/configure.ini b/src/locale/pl/configure.ini index 8fd5db1dc..e2ee9851b 100755 --- a/src/locale/pl/configure.ini +++ b/src/locale/pl/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/pt/configure.ini b/src/locale/pt/configure.ini index 7ed4a1ef6..03ce2acc7 100755 --- a/src/locale/pt/configure.ini +++ b/src/locale/pt/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/ru/configure.ini b/src/locale/ru/configure.ini index 548273d2a..08cb09cd5 100755 --- a/src/locale/ru/configure.ini +++ b/src/locale/ru/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/te/configure.ini b/src/locale/te/configure.ini index 111582bde..f23e5c3dc 100644 --- a/src/locale/te/configure.ini +++ b/src/locale/te/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "చెల్లని యుఆర్ఎల్!" crawl_component_missing_fields = "అన్ని ఖాళీలను భర్తీ చేయాలి!" crawl_component_media_source_added = "మీడియా సోర్సు ఆడ్ చేయడమైనది!" crawl_component_subsearch_added = "సబ్ సెర్చ్ ఆడ్ చేయడమైనది!" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "సోర్స్ డిలీట్ చెయ్యబడలేదు!" crawl_component_media_source_deleted = "మీడియా సోర్సు డిలీట్ చేయడమైనది!" crawl_component_subsearch_deleted = "సబ్ సెర్చ్ డిలీట్ చేయడమైనది!" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "మెషిన్ ఫార్మ్ ల system_component_machine_doesnt_exists = "మెషిన్ పేరు సరిఅయినది కాదు!" system_component_stop_service_first = "మెషిన్ ఉపయోగంలో వున్నది. దయచేసి మొదట సర్వీసెస్ ని ఆపండి!" system_component_machine_deleted = "మెషిన్ డిలీట్ చేయబడినది!" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "లాగ్ ఫైల్ కనపడలేదు." system_component_machine_servers_updated = "మెషిన్ సర్వర్స్ అప్డేట్ చేయబడినవి!" system_component_machine_no_action = "ఏ ఏక్సన్ చేయుట కుదరలేదు!" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "ఖాతా సృస్టించ ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "గ్రూప్ ఫీడ్స్ అ ; PaginationElement.php pagination_helper_next = "తర్వాత" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/th/configure.ini b/src/locale/th/configure.ini index d12b2ea56..4e92a9025 100755 --- a/src/locale/th/configure.ini +++ b/src/locale/th/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/tr/configure.ini b/src/locale/tr/configure.ini index 8e70eef27..74cabe6c9 100755 --- a/src/locale/tr/configure.ini +++ b/src/locale/tr/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/vi_VN/configure.ini b/src/locale/vi_VN/configure.ini index 8b708a342..05447c538 100755 --- a/src/locale/vi_VN/configure.ini +++ b/src/locale/vi_VN/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "" system_component_machine_doesnt_exists = "" system_component_stop_service_first = "" system_component_machine_deleted = "" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "" system_component_machine_servers_updated = "" system_component_machine_no_action = "" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "Trang kế tiếp" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/zh_CN/configure.ini b/src/locale/zh_CN/configure.ini index 104a5b131..a4406c6d1 100755 --- a/src/locale/zh_CN/configure.ini +++ b/src/locale/zh_CN/configure.ini @@ -466,6 +466,7 @@ crawl_component_invalid_url = "" crawl_component_missing_fields = "" crawl_component_media_source_added = "增加多媒體" crawl_component_subsearch_added = "" +crawl_component_clearing_data = "" crawl_component_no_delete_source = "" crawl_component_media_source_deleted = "刪除多媒體" crawl_component_subsearch_deleted = "" @@ -563,6 +564,9 @@ system_component_machine_incomplete = "未完成" system_component_machine_doesnt_exists = "不存在" system_component_stop_service_first = "停止服務" system_component_machine_deleted = "刪除" +system_component_job_doesnt_exist = "" +system_component_job_disabled = "" +system_component_job_enabled = "" system_component_no_machine_log = "無紀錄" system_component_machine_servers_updated = "服務更新" system_component_machine_no_action = "無動作" @@ -678,9 +682,8 @@ register_controller_user_already_exists = "" ; ; MachinestatusView.php machinestatus_view_media_updater = "" -machinestatus_view_mode = "" +machinestatus_view_configure_media_jobs = "" machinestatus_view_nameserver = "" -machinestatus_view_distributed = "" machinestatus_view_log = "" machinestatus_view_machines = "" machinestatus_view_add_machine = "" @@ -1207,6 +1210,8 @@ searchsources_element_aux_url_xpath = "" searchsources_element_link_xpath_text = "" searchsources_element_media_sources = "" searchsources_element_subsearches = "" +searchsources_element_confirm_delete = "" +searchsources_element_clear_news_trending = "" searchsources_element_medianame = "" searchsources_element_action = "" searchsources_element_sourcetype = "" @@ -1272,7 +1277,6 @@ pageoptions_element_page_range = "" pageoptions_element_summarizer = "" pageoptions_element_max_description = "" pageoptions_element_cache_pages = "" -pageoptions_element_suffix_phrases = "" pageoptions_element_allow_recrawl = "" pageoptions_element_file_types = "" pageoptions_element_classifiers_rankers = "" @@ -1315,7 +1319,7 @@ pageoptions_element_page_process_times = "" pageoptions_element_page_process_time = "" pageoptions_element_rule_process_time = "" pageoptions_element_canon_time = "" -pageoptions_element_maximal_time = "" +pageoptions_element_term_pos_sentence_tag_time = "" pageoptions_element_qa_time = "" pageoptions_element_total_time = "" pageoptions_element_time_seconds = "" @@ -1405,6 +1409,17 @@ groupfeed_element_no_longer_update = "" ; PaginationElement.php pagination_helper_next = "下一页" ; +; MediajobsElement.php +mediajobs_element_configure_media_jobs = "" +mediajobs_element_mode = "" +mediajobs_element_nameserver = "" +mediajobs_element_distributed = "" +mediajobs_element_jobs_list = "" +mediajobs_element_job_name = "" +mediajobs_element_run_status = "" +mediajobs_element_on = "" +mediajobs_element_off = "" +; ; ManageclassifiersElement.php manageclassifiers_element_classifiers = "" manageclassifiers_element_label_col = "" diff --git a/src/locale/zh_CN/resources/term_weight.txt.gz b/src/locale/zh_CN/resources/term_weight.txt.gz new file mode 100644 index 000000000..647eb8361 Binary files /dev/null and b/src/locale/zh_CN/resources/term_weight.txt.gz differ diff --git a/src/models/CrawlModel.php b/src/models/CrawlModel.php index 6815639e0..e39b583a6 100755 --- a/src/models/CrawlModel.php +++ b/src/models/CrawlModel.php @@ -709,6 +709,7 @@ EOT; $params = [$timestamp, $new_info]; $this->execMachines("setCrawlSeedInfo", $machine_urls, serialize($params)); + return; } $pre_dir = C\CRAWL_DIR . '/cache/' . self::index_data_base_name . $timestamp; diff --git a/src/models/MachineModel.php b/src/models/MachineModel.php index ef21761a4..20f7cc5b7 100644 --- a/src/models/MachineModel.php +++ b/src/models/MachineModel.php @@ -457,4 +457,65 @@ class MachineModel extends Model } } } + /** + * + */ + public function getJobsList() + { + $job_path = C\BASE_DIR . "/library/media_jobs/"; + $app_job_path = C\APP_DIR ."/library/media_jobs/"; + $job_file_folders = [ $job_path => glob("$job_path*Job.php") , + $app_job_path => glob("$app_job_path*Job.php")]; + $jobs_list = []; + foreach ($job_file_folders as $folder => $job_files) { + foreach ($job_files as $job_path) { + $job = $this->getJobNameFromPath($job_path); + if ($job == 'Media') { + continue; + } + if (!isset($jobs_list[$job])) { + $jobs_list[$job] = $this->getJobStatus($job); + } + } + } + ksort($jobs_list); + return $jobs_list; + } + /** + * + */ + public function getJobStatus($job) + { + $job_dir = C\WORK_DIRECTORY . "/schedules/jobs"; + $job_file = $job_dir . "/$job.txt"; + if (!file_exists($job_file)) { + $this->createIfNecessaryDirectory($job_dir); + file_put_contents($job_file, serialize(true)); + chmod($job_file, 0777); + } + return unserialize(file_get_contents($job_file)); + } + /** + * + */ + public function setJobStatus($job, $status) + { + $status = empty($status) ? false : true; + $job_dir = C\WORK_DIRECTORY . "/schedules/jobs"; + $job_file = $job_dir . "/$job.txt"; + $this->createIfNecessaryDirectory($job_dir); + file_put_contents($job_file, serialize($status)); + chmod($job_file, 0777); + } + /** + * + */ + private function getJobNameFromPath($job_path) + { + $job = pathinfo($job_path, \PATHINFO_FILENAME); + if (empty($job) || substr($job, -3) != 'Job') { + return false; + } + return substr($job, 0, -3); + } } diff --git a/src/models/Model.php b/src/models/Model.php index c4ca6894a..af0e377c4 100755 --- a/src/models/Model.php +++ b/src/models/Model.php @@ -167,6 +167,28 @@ class Model implements CrawlConstants } return file_put_contents($filename, $data); } + /** + * Creates a directory and sets it to world permission if it doesn't + * aleady exist + * + * @param string $directory name of directory to create + * @return int -1 on failure, 0 if already existed, 1 if created + */ + public function createIfNecessaryDirectory($directory) + { + if (file_exists($directory)) { + return 0; + } else { + set_error_handler(null); + @mkdir($directory); + @chmod($directory, 0777); + set_error_handler(C\NS_CONFIGS . "yioop_error_handler"); + } + if (file_exists($directory)) { + return 1; + } + return -1; + } /** * Given a page summary, extracts snippets which * are related to a set of search words. For each snippet, bold faces the @@ -399,7 +421,7 @@ class Model implements CrawlConstants public function isSingleLocalhost($machine_urls, $index_timestamp = -1) { if ($index_timestamp >= 0) { - $index_archive_name= self::index_data_base_name . $index_timestamp; + $index_archive_name = self::index_data_base_name . $index_timestamp; if (file_exists(C\CRAWL_DIR . "/cache/$index_archive_name/no_network.txt")) { return true; diff --git a/src/models/ParallelModel.php b/src/models/ParallelModel.php index 04c37e079..500cbd41f 100755 --- a/src/models/ParallelModel.php +++ b/src/models/ParallelModel.php @@ -284,12 +284,6 @@ class ParallelModel extends Model $summary = []; $ellipsis = ""; $description_hash = []; - $sql = "SELECT FI.TITLE AS TITLE, FI.DESCRIPTION AS DESCRIPTION, - FI.LINK AS LINK, FI.IMAGE_LINK AS IMAGE_LINK, - FI.SOURCE_NAME AS SOURCE_NAME, - MS.CATEGORY AS CATEGORY - FROM FEED_ITEM FI, MEDIA_SOURCE MS - WHERE GUID=? AND FI.SOURCE_NAME = MS.NAME"; foreach ($lookup_info as $lookup_item) { if (count($lookup_item) == 2) { list($word_key, $index_name) = $lookup_item; @@ -305,43 +299,23 @@ class ParallelModel extends Model list($machine, $key, $index_name, $generation, $summary_offset) = $lookup_item; } - if (strcmp($index_name, "db") != 0) { - $index = IndexManager::getIndex($index_name); - if (is_integer($summary_offset) && - is_integer($generation)) { - if ($summary_offset == 0) { - /* - we insert a dummy object at start of each - partition, so no legit lookup should have - offset 0. It still might happen, if a - changeDocumentOffsets failed, so we add - this check - */ - continue; - } - $page = $index->getPage($summary_offset, - $generation); - } else { - $page = null; + $index = IndexManager::getIndex($index_name); + if (is_integer($summary_offset) && + is_integer($generation)) { + if ($summary_offset == 0) { + /* + we insert a dummy object at start of each + partition, so no legit lookup should have + offset 0. It still might happen, if a + changeDocumentOffsets failed, so we add + this check + */ + continue; } + $page = $index->getPage($summary_offset, + $generation); } else { - $guid = L\base64Hash(substr($key, - IndexShard::DOC_KEY_LEN, - IndexShard::DOC_KEY_LEN)); - $result = $db->execute($sql, [$guid]); - $page = false; - if ($result) { - $row = $db->fetchArray($result); - if ($row) { - $page = []; - $page[self::TITLE] = $row['TITLE']; - $page[self::DESCRIPTION] = $row['DESCRIPTION']; - $page[self::URL] = $row['LINK']; - $page[self::SOURCE_NAME] = $row['SOURCE_NAME']; - $page[self::IMAGE_LINK] = $row['IMAGE_LINK']; - $page['CATEGORY'] = $row['CATEGORY']; - } - } + $page = null; } if (!$page || $page == []) { continue; @@ -438,7 +412,7 @@ class ParallelModel extends Model if (!isset($info[0][4])) { return false; } - $word_iterator = new WordIterator($info[0][4], 0, $index_name, true); + $word_iterator = new WordIterator($info[0][4], $index_name, true); if (is_array($next_docs = $word_iterator->nextDocsWithWord())) { $doc_info = current($next_docs); if (!$doc_info) { diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php index 34f377aa9..5824f05b4 100755 --- a/src/models/PhraseModel.php +++ b/src/models/PhraseModel.php @@ -607,7 +607,7 @@ class PhraseModel extends ParallelModel if (isset($words) && count($words) == 1 && count($disallow_phrases) < 1 && !strpos($words[0], " ")) { $phrase_string = $words[0]; - $phrase_hash = L\allCrawlHashPaths($phrase_string); + $phrase_hash = L\crawlHashWord($phrase_string); $word_struct = ["KEYS" => [$phrase_hash], "QUOTE_POSITIONS" => null, "DISALLOW_KEYS" => [], "WEIGHT" => $weight, "INDEX_NAME" => $index_name, @@ -617,7 +617,7 @@ class PhraseModel extends ParallelModel $hashes = []; $word_keys = []; foreach ($words as $word) { - $word_keys[] = L\allCrawlHashPaths($word); + $word_keys[] = L\crawlHashWord($word); } if (count($word_keys) == 0) { $word_keys = null; @@ -1689,48 +1689,19 @@ class PhraseModel extends ParallelModel $direction); $min_group_override = true; } else { - //can happen if exact phrase search suffix approach used - if (isset($distinct_word_keys[$i][0]) && - is_array($distinct_word_keys[$i][0])) { - $distinct_keys = $distinct_word_keys[$i]; - } else { - $distinct_keys = [$distinct_word_keys[$i]]; - } - $sum = 0; - $tmp_word_iterators =[]; - $m = 0; - foreach ($distinct_keys as $distinct_key) { - $shift = (isset($distinct_key[1])) ? - $distinct_key[1] : 0; - $distinct_key_id = L\unbase64Hash( - $distinct_key[0]); - $actual_index_name = $index_name; - $direction = self::ASCENDING; - if (($index_name[0] == "-")) { - $actual_index_name = substr($index_name, 1); - $direction = self::DESCENDING; - } - $tmp_word_iterators[$m] = - new I\WordIterator($distinct_key_id, $shift, - $actual_index_name, true, $filter, $to_retrieve, - $direction); - $sum += $tmp_word_iterators[$m]->num_docs; - if ($tmp_word_iterators[$m]->dictionary_info !=[]) { - $min_group_override = true; - $m++; - } else { - unset($tmp_word_iterators[$m]); - } - if ($sum > $lookup_cutoff) { - break; - } - } - if ($m == 1) { - $word_iterators[$i] = $tmp_word_iterators[0]; - } else { - $word_iterators[$i] = new I\DisjointIterator( - $tmp_word_iterators); + $distinct_key = $distinct_word_keys[$i]; + $distinct_key_id = L\unbase64Hash($distinct_key); + $direction = self::ASCENDING; + $actual_index_name = $index_name; + if (($index_name[0] == "-")) { + $actual_index_name = substr($index_name, 1); + $direction = self::DESCENDING; } + $word_iterators[$i] = + new I\WordIterator($distinct_key_id, + $actual_index_name, true, $filter, $to_retrieve, + $direction); + $min_group_override = true; } foreach ($word_keys as $index => $key) { if (isset($distinct_word_keys[$i]) && @@ -1748,10 +1719,8 @@ class PhraseModel extends ParallelModel $actual_index_name = substr($index_name, 1); $direction = self::DESCENDING; } - /* notice for now shift always 0 - you can't disallow - phrases */ $disallow_iterator = - new I\WordIterator($disallow_keys[$i], 0, + new I\WordIterator($disallow_keys[$i], $actual_index_name, false, $filter, I\IndexBundleIterator::RESULTS_PER_BLOCK, $direction); diff --git a/src/models/ProfileModel.php b/src/models/ProfileModel.php index 24127d02a..ce6f413ab 100755 --- a/src/models/ProfileModel.php +++ b/src/models/ProfileModel.php @@ -69,7 +69,7 @@ class ProfileModel extends Model 'SEARCH_ANALYTICS_MODE', 'SEARCHBAR_PATH', 'SEND_MAIL_MEDIA_UPDATER', 'SERVER_ALPHA', 'SESSION_NAME', 'SIDE_ADSCRIPT', 'SIDEBAR_COLOR', 'SIGNIN_LINK', 'SIMILAR_LINK', - 'SUBSEARCH_LINK', 'SUFFIX_PHRASES', 'TIMEZONE', 'TITLE_WEIGHT', + 'SUBSEARCH_LINK', 'TIMEZONE', 'TITLE_WEIGHT', 'TOPBAR_COLOR', 'TOP_ADSCRIPT','TOR_PROXY', 'USE_FILECACHE', 'USE_MAIL_PHP', 'USE_PROXY', 'USER_AGENT_SHORT', 'WEB_URI', 'WEB_ACCESS', 'WORD_SUGGEST' @@ -181,13 +181,6 @@ class ProfileModel extends Model CREDIT_LEDGER (USER_ID)", "CURRENT_WEB_INDEX" => "CREATE TABLE CURRENT_WEB_INDEX (CRAWL_TIME NUMERIC(" . C\TIMESTAMP_LEN . ") PRIMARY KEY)", - "FEED_ITEM" => "CREATE TABLE FEED_ITEM (GUID CHAR(". - C\TIMESTAMP_LEN . ") PRIMARY KEY, - TITLE VARCHAR(".C\TITLE_LEN."), LINK VARCHAR(" . - C\MAX_URL_LEN."), - IMAGE_LINK VARCHAR(" . C\MAX_URL_LEN . "), - DESCRIPTION VARCHAR(" . C\MAX_GROUP_POST_LEN . "), - PUBDATE INTEGER, SOURCE_NAME VARCHAR(" . C\LONG_NAME_LEN . "))", "GROUP_ITEM" => "CREATE TABLE GROUP_ITEM (ID $serial PRIMARY KEY $auto_increment, PARENT_ID INTEGER, GROUP_ID INTEGER, USER_ID INTEGER, URL VARCHAR(" . C\TITLE_LEN @@ -434,7 +427,7 @@ class ProfileModel extends Model { $to_make_dirs = [$directory, "$directory/app", "$directory/archives", "$directory/cache", - "$directory/classifiers", "$directory/data", "$directory/feeds", + "$directory/classifiers", "$directory/data", "$directory/app/locale", "$directory/log", "$directory/prepare", "$directory/schedules", "$directory/search_filters", "$directory/temp"]; @@ -534,7 +527,6 @@ EOT; 'MEDIA_MODE' => "name_server", 'SESSION_NAME' => "yioopbiscuit", 'SIDEBAR_COLOR' => "#F8F8F8", - 'SUFFIX_PHRASES' => "false", 'TIMEZONE' => 'America/Los_Angeles', 'TOPBAR_COLOR' => "#F5F5FF", ]; @@ -620,28 +612,6 @@ EOT; } return false; } - /** - * Creates a directory and sets it to world permission if it doesn't - * aleady exist - * - * @param string $directory name of directory to create - * @return int -1 on failure, 0 if already existed, 1 if created - */ - public function createIfNecessaryDirectory($directory) - { - if (file_exists($directory)) { - return 0; - } else { - set_error_handler(null); - @mkdir($directory); - @chmod($directory, 0777); - set_error_handler(C\NS_CONFIGS . "yioop_error_handler"); - } - if (file_exists($directory)) { - return 1; - } - return -1; - } /** * Check if $dbinfo provided the connection details for a Yioop/SeekQuarry * database. If it does provide a valid db connection but no data then try diff --git a/src/models/SourceModel.php b/src/models/SourceModel.php index 97ff28923..a80c6302a 100644 --- a/src/models/SourceModel.php +++ b/src/models/SourceModel.php @@ -72,12 +72,10 @@ class SourceModel extends ParallelModel * * @param string $source_type the particular kind of media source to return * for example, video - * @param bool $has_no_feed_items if true returns only those items which - * have not feed_items associated with them. + * @return array a list of web sites which are either video or news sites */ - public function getMediaSources($source_type = "", - $has_no_feed_items = false) + public function getMediaSources($source_type = "") { $db = $this->db; $sources = []; @@ -87,16 +85,6 @@ class SourceModel extends ParallelModel $sql .= " WHERE TYPE=:type"; $params = [":type" => $source_type]; } - if ($has_no_feed_items) { - if (empty($source_type)) { - $sql .= " WHERE "; - } else { - $sql .= " AND "; - } - $sql .= " NOT EXISTS - (SELECT * FROM FEED_ITEM F - WHERE F.SOURCE_NAME = M.NAME)"; - } $result = $db->execute($sql, $params); while ($row = $db->fetchArray($result)) { $sources[] = $row; @@ -219,17 +207,6 @@ class SourceModel extends ParallelModel */ public function deleteMediaSource($timestamp) { - $sql = "SELECT * FROM MEDIA_SOURCE WHERE TIMESTAMP='$timestamp'"; - $result = $this->db->execute($sql); - if ($result) { - $row = $this->db->fetchArray($result); - if (isset($row['TYPE']) && $row['TYPE'] == "rss") { - if ($row['NAME'] != "") { - $sql = "DELETE FROM FEED_ITEM WHERE SOURCE_NAME=?"; - $this->db->execute($sql, [$row['NAME']]); - } - } - } $sql = "DELETE FROM MEDIA_SOURCE WHERE TIMESTAMP=?"; $this->db->execute($sql, [$timestamp]); } @@ -386,8 +363,23 @@ class SourceModel extends ParallelModel } $sql = "DELETE FROM SUBSEARCH WHERE FOLDER_NAME=?"; $db->execute($sql, [$folder_name]); - $sql = "DELETE FROM TRANSLATION WHERE IDENTIFIER_STRING = ?"; $db->execute($sql, [$locale_string]); } + /** + * + */ + public function clearFeedData($machine_urls = null) + { + if ($machine_urls != null && + !$this->isSingleLocalhost($machine_urls)) { + $this->execMachines("clearFeedData", $machine_urls); + return; + } + $db = $this->db; + $sql = "DELETE FROM TRENDING_TERM"; + $db->execute($sql); + $feed_dir = C\CRAWL_DIR . '/cache/' . self::feed_index_data_base_name; + $db->unlinkRecursive($feed_dir); + } } diff --git a/src/models/TrendingModel.php b/src/models/TrendingModel.php index b6f4ffaf2..f050e3cf5 100644 --- a/src/models/TrendingModel.php +++ b/src/models/TrendingModel.php @@ -107,8 +107,7 @@ class TrendingModel extends Model implements MediaConstants $random_trends_file = "$trend_dir/random_" . "{$category}_{$locale_tag}.txt"; if (file_exists($random_trends_file) && - filemtime($random_trends_file) + (5 * C\ONE_MINUTE) > - time()) { + filemtime($random_trends_file) + (5 * C\ONE_MINUTE) > time()) { return unserialize(file_get_contents($random_trends_file)); } $trend_data = $this->topTermsForUpdatePeriods($locale_tag, [], diff --git a/src/views/MachinestatusView.php b/src/views/MachinestatusView.php index 0c98fe9f4..7f7ddccdb 100644 --- a/src/views/MachinestatusView.php +++ b/src/views/MachinestatusView.php @@ -52,32 +52,21 @@ class MachinestatusView extends View { $admin_url = htmlentities(B\controllerUrl('admin', true)); $csrf_token = C\CSRF_TOKEN."=". $data[C\CSRF_TOKEN]; - $base_url = "{$admin_url}a=manageMachines&$csrf_token&arg="; ?> + $base_url = "{$admin_url}a=manageMachines&$csrf_token&arg="; + $log_url = $base_url ."log&name=NAME_SERVER&type=MediaUpdater". + "&id=0"; + $on_media_updater = $base_url . "update&action=start&". + "name=NAME_SERVER&type=MediaUpdater&id=0"; + $off_media_updater = $base_url ."update&action=stop&". + "name=NAME_SERVER&type=MediaUpdater&id=0"; + $name_server_update = $data['MEDIA_MODE']=='name_server'; + $update_mode_url = $base_url . "updatemode"; + $caution = !isset($data['MACHINES']['NAME_SERVER']["MediaUpdater"]) + || $data['MACHINES']['NAME_SERVER']["MediaUpdater"] == 0; + ?> <h2><?=tl('machinestatus_view_media_updater')?></h2> - <div class="no-margin"><b><?php - e(tl('machinestatus_view_mode')); - $log_url = $base_url ."log&name=NAME_SERVER&type=MediaUpdater". - "&id=0"; - $on_media_updater = $base_url . "update&action=start&". - "name=NAME_SERVER&type=MediaUpdater&id=0"; - $off_media_updater = $base_url ."update&action=stop&". - "name=NAME_SERVER&type=MediaUpdater&id=0"; - $name_server_update = $data['MEDIA_MODE']=='name_server'; - $update_mode_url = $base_url . "updatemode"; - $caution = !isset($data['MACHINES']['NAME_SERVER']["MediaUpdater"]) - || $data['MACHINES']['NAME_SERVER']["MediaUpdater"] == 0; - ?></b> [<?php - if ($name_server_update) { - e("<b>".tl('machinestatus_view_nameserver')); - ?></b>|<a href="<?php e($update_mode_url); ?>"><?php - e(tl('machinestatus_view_distributed'));?></a><?php - } else { - ?><a href="<?php e($update_mode_url); ?>"><?php - e(tl('machinestatus_view_nameserver')); - ?></a>|<b><?php - e(tl('machinestatus_view_distributed'));?></b><?php - } - ?>]</div> + <div class="no-margin">[<a href="<?=$base_url . 'mediajobs' + ?>"><?= tl('machinestatus_view_configure_media_jobs'); ?>]</a></div> <div class="box"> <h3 class="no-margin"><?=tl('machinestatus_view_nameserver') ?></h3> <form id="media-form" method="post"> diff --git a/src/views/elements/MediajobsElement.php b/src/views/elements/MediajobsElement.php new file mode 100644 index 000000000..a639defc3 --- /dev/null +++ b/src/views/elements/MediajobsElement.php @@ -0,0 +1,96 @@ +<?php +/** + * SeekQuarry/Yioop -- + * Open Source Pure PHP Search Engine, Crawler, and Indexer + * + * Copyright (C) 2009 - 2019 Chris Pollett chris@pollett.org + * + * LICENSE: + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + * + * END LICENSE + * + * @author Chris Pollett chris@pollett.org + * @license https://www.gnu.org/licenses/ GPL3 + * @link https://www.seekquarry.com/ + * @copyright 2009 - 2019 + * @filesource + */ +namespace seekquarry\yioop\views\elements; + +use seekquarry\yioop as B; +use seekquarry\yioop\configs as C; + +/** + * + * @author Chris Pollett + */ +class MediajobsElement extends Element +{ + /** + * + * @param array $data holds data on + */ + public function render($data) + { + $admin_url = htmlentities(B\controllerUrl('admin', true)); + $csrf_token = C\CSRF_TOKEN."=". $data[C\CSRF_TOKEN]; + $base_url = "{$admin_url}a=manageMachines&$csrf_token&arg="; + $name_server_update = empty($data['MEDIA_MODE']) || + $data['MEDIA_MODE'] =='name_server'; + $update_mode_url = $base_url . "updatemode"; ?> + <div class="current-activity"> + <div class="<?=$data['leftorright'] ?>"> + [<a href="<?=$base_url ?>" + >X</a>] + </div> + <h2><?=tl('mediajobs_element_configure_media_jobs') ?></h2> + <div><b><?php + e(tl('mediajobs_element_mode')); + ?></b> [<?php + if ($name_server_update) { + ?><?= tl('mediajobs_element_nameserver'); + ?>|<a href="<?=$update_mode_url ?>"><?= + tl('mediajobs_element_distributed')?></a><?php + } else { + ?><a href="<?=$update_mode_url ?>"><?= + tl('mediajobs_element_nameserver'); + ?></a>|<?= + tl('mediajobs_element_distributed');?><?php + } + ?>]</div> + <h3><?=tl('mediajobs_element_jobs_list') ?></h3> + <table class="admin-table"> + <tr><th><?=tl('mediajobs_element_job_name')?></th> + <th><?=tl('mediajobs_element_run_status') ?></th></tr><?php + foreach ($data["JOBS_LIST"] as $job_name => $enabled) { + $selected = ($enabled) ? "enablejob&job_name=$job_name" : + "disablejob&job_name=$job_name"; + $options = [ + "enablejob&job_name=$job_name" => + tl('mediajobs_element_on'), + "disablejob&job_name=$job_name" => + tl('mediajobs_element_off'), + ]; ?> + <tr><td><?=$job_name ?></td><td><?= + $this->view->helper('options')->renderLinkDropDown( + "job-toggle-" . $job_name, $options, $selected, $base_url + ); + ?></td></tr><?php + } ?> + </table> + </div><?php + } +} diff --git a/src/views/elements/PageoptionsElement.php b/src/views/elements/PageoptionsElement.php index aa8a3ef20..6bc9ba07a 100644 --- a/src/views/elements/PageoptionsElement.php +++ b/src/views/elements/PageoptionsElement.php @@ -213,18 +213,6 @@ class PageOptionsElement extends Element }?> /> </div> - <div class="top-margin"><b><label for="suffix-phrases"><?= - tl('pageoptions_element_suffix_phrases')?> - </label></b><input - id='suffix-phrases' type="checkbox" name="SUFFIX_PHRASES" - value="true" - <?php if (!empty($data['SUFFIX_PHRASES']) && - $data['SUFFIX_PHRASES'] != "false") { - e("checked='checked'"); - }?> - /><?php e($this->view->helper("helpbutton")->render( - "Suffix Phrases", $data[C\CSRF_TOKEN]));?> - </div> <div class="top-margin"><b><label for="page-recrawl-frequency"><?= tl('pageoptions_element_allow_recrawl') ?></label></b> <?php $this->view->helper("options")->render( @@ -563,7 +551,7 @@ class PageOptionsElement extends Element e("<pre>\n{$data['QUESTIONS_TRIPLET']}\n</pre>"); } if (isset($data["EXTRACTED_META_WORDS"])) { - e("<h3>".tl('pageoptions_element_extracted_metas')."</h3>"); + e("<h3>" . tl('pageoptions_element_extracted_metas') . "</h3>"); e("<pre>\n{$data['EXTRACTED_META_WORDS']}\n</pre>"); } if (isset($data["PROCESS_TIMES"])) { @@ -574,7 +562,8 @@ class PageOptionsElement extends Element 'RULE_PROCESS' => tl('pageoptions_element_rule_process_time'), 'CANONICALIZE' => tl('pageoptions_element_canon_time'), - 'MAXIMAL_TERMS' => tl('pageoptions_element_maximal_time'), + 'TERM_POSITIONS_SENTENCE_TAGGING' => + tl('pageoptions_element_term_pos_sentence_tag_time'), 'QUESTION_ANSWER_EXTRACT' => tl('pageoptions_element_qa_time'), 'TOTAL' => tl('pageoptions_element_total_time'), diff --git a/src/views/elements/SearchsourcesElement.php b/src/views/elements/SearchsourcesElement.php index c6b96b688..bc6d15d53 100644 --- a/src/views/elements/SearchsourcesElement.php +++ b/src/views/elements/SearchsourcesElement.php @@ -125,7 +125,12 @@ class SearchsourcesElement extends Element tl('searchsources_element_subsearches')?></a></li> </ul> <div class='tab-menu-content'> - <div id="mediasourcetab"><?php + <div id="mediasourcetab"> + <div><b>[<a href="<?= $base_url . '&arg=cleardata'?>" + onclick='javascript:return confirm("<?= + tl('searchsources_element_confirm_delete') ?>");' ><?= + tl('searchsources_element_clear_news_trending') + ?></a>]</b></div><?php $data['FORM_TYPE'] = ""; $data['SEARCH_ARG'] = 'sourcesearch'; $data['TABLE_TITLE'] = tl('searchsources_element_media_sources'); diff --git a/tests/PhraseParserTest.php b/tests/PhraseParserTest.php index afccf67bf..357681fb8 100644 --- a/tests/PhraseParserTest.php +++ b/tests/PhraseParserTest.php @@ -75,17 +75,14 @@ EOD; THE THE ‘Deep Space nine’ ‘Deep Space’ version of GIANT the the ©2012 -reddit: the front page of the internet +reddit: the front page of the internet prime minister EOD; $extracted_data = PhraseParser::extractPhrasesInLists($phrase_string, "en-US"); $word_lists = $extracted_data['WORD_LIST']; $words = array_keys($word_lists); - if (C\SUFFIX_PHRASES == 'true') { - $this->assertTrue(in_array("the the", $words), "Extract Bigram 1"); - $this->assertTrue(in_array("deep space", $words), - "Extract Bigram 2"); - } + $this->assertTrue(in_array("prime-minist", $words), + "Extract Entity 1"); $this->assertTrue(in_array("deep", $words), "Unigrams still present 1"); $this->assertTrue(in_array("space", $words), "Unigrams still present 2");