viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
Filename | |
---|---|
src/executables/Fetcher.php | |
src/executables/QueueServer.php | |
src/library/CrawlConstants.php | |
src/library/CrawlDaemon.php |
diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php index 9a6a5ed3f..88d48d3dd 100755 --- a/src/executables/Fetcher.php +++ b/src/executables/Fetcher.php @@ -402,6 +402,12 @@ class Fetcher implements CrawlConstants * @var array */ public $proxy_servers; + /** + * Holds the value of a debug message that might have been sent from + * the command line during the current execution of loop(); + * @var string + */ + public $debug; /** * Before receiving any data from a queue server's web app this is * the default assumed post_max_size in bytes @@ -485,6 +491,7 @@ class Fetcher implements CrawlConstants $this->crawl_order = self::PAGE_IMPORTANCE; $this->max_depth = -1; $this->summarizer_option = self::BASIC_SUMMARIZER; + $this->debug = ""; } /** * Return the fetcher's copy of a page processor for the given @@ -594,6 +601,11 @@ class Fetcher implements CrawlConstants "/schedules/{$prefix}FetcherMessages.txt"; if (file_exists($fetcher_message_file)) { $info = unserialize(file_get_contents($fetcher_message_file)); + if (isset($info[self::DEBUG])) { + $this->debug = $info[self::DEBUG]; + unlink($message_file); + continue; + } unlink($fetcher_message_file); if (isset($info[self::STATUS]) && $info[self::STATUS] == self::STOP_STATE) { @@ -736,7 +748,11 @@ class Fetcher implements CrawlConstants "Ensure minimum loop time by sleeping..." . $sleep_time); sleep($sleep_time); } - } //end while + if (!empty($this->debug)) { + L\crawlLog("Debug Message: {$this->debug} has been processed"); + $this->debug = ""; + } + }//end while L\crawlLog("Fetcher shutting down!!"); } /** diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php index 96a14b448..3daea00e0 100755 --- a/src/executables/QueueServer.php +++ b/src/executables/QueueServer.php @@ -34,12 +34,10 @@ use seekquarry\yioop\configs as C; use seekquarry\yioop\library as L; use seekquarry\yioop\library\CrawlConstants; use seekquarry\yioop\library\CrawlDaemon; -use seekquarry\yioop\library\FetchUrl; use seekquarry\yioop\library\IndexShard; use seekquarry\yioop\library\IndexArchiveBundle; use seekquarry\yioop\library\Join; use seekquarry\yioop\library\processors\PageProcessor; -use seekquarry\yioop\library\PhraseParser; use seekquarry\yioop\library\DoubleIndexBundle; use seekquarry\yioop\library\UrlParser; use seekquarry\yioop\library\WebQueueBundle; @@ -52,7 +50,7 @@ if (!defined("seekquarry\\yioop\\configs\\UNIT_TEST_MODE")) { } ini_set("memory_limit","2500M"); //so have enough memory to crawl big pages /** For crawlHash function and Yioop constants */ -require_once __DIR__."/../library/Utility.php"; +require_once __DIR__ . "/../library/Utility.php"; if (!C\PROFILE) { echo "Please configure the search engine instance ". "by visiting its web interface on localhost.\n"; @@ -284,6 +282,12 @@ class QueueServer implements CrawlConstants, Join * @var string */ public $process_name; + /** + * Holds the value of a debug message that might have been sent from + * the command line during the current execution of loop(); + * @var string + */ + public $debug; /** * A mapping between class field names and parameters which might * be sent to a queue server via an info associative array. @@ -348,6 +352,7 @@ class QueueServer implements CrawlConstants, Join $this->waiting_hosts = []; $this->server_name = "IndexerAndScheduler"; $this->process_name = "0-QueueServer"; + $this->debug = ""; } /** * This is the function that should be called to get the queue server @@ -518,13 +523,15 @@ class QueueServer implements CrawlConstants, Join static $first_check; static $last_check; $time = time(); - if (!isset($first_check)) { - $first_check = $time; - $last_check = $time; - } - if ($time - $last_check < C\LOG_TIMEOUT || - $time - $first_check < C\PROCESS_TIMEOUT ) { - return; + if ($this->debug != "NOT_RUNNING") { + if (!isset($first_check)) { + $first_check = $time; + $last_check = $time; + } + if ($time - $last_check < C\LOG_TIMEOUT || + $time - $first_check < C\PROCESS_TIMEOUT ) { + return; + } } $last_check = $time; L\crawlLog("Checking if both processes still running ..."); @@ -534,7 +541,7 @@ class QueueServer implements CrawlConstants, Join $lines_to_check); $time = time(); // just in case took time to compute lines L\crawlLog("...Got " . $this->process_name . ".log lines"); - if (count($lines) < $lines_to_check) { + if ($this->debug != "NOT_RUNNING" && count($lines) < $lines_to_check) { L\crawlLog("...Too few log lines to check if both processes " . "running. Assume still running."); return; @@ -564,10 +571,12 @@ class QueueServer implements CrawlConstants, Join } L\crawlLog("...difference last timestamp and current time ". ($time - $last_process_timestamp)); - if ($time - $last_process_timestamp < C\PROCESS_TIMEOUT ) { + if ($this->debug != "NOT_RUNNING" && + $time - $last_process_timestamp < C\PROCESS_TIMEOUT ) { L\crawlLog("...done check. Both processes still running."); return; } + $this->debug = ""; L\crawlLog( "$process seems to have died restarting..."); $process_lines = array_slice($process_lines, -10); $time_string = date("r", time()); @@ -589,7 +598,7 @@ class QueueServer implements CrawlConstants, Join } else { file_put_contents($error_log, $out_msg, FILE_APPEND); } - $init_args = ["QueueServer.php", "start", $this->channel, $process]; + $init_args = ["QueueServer.php", "start", "{$this->channel}", $process]; L\crawlLog( "!!!!Writing to $error_log ". "crash message about $process..."); CrawlDaemon::init($init_args, "QueueServer", -3); @@ -620,7 +629,7 @@ class QueueServer implements CrawlConstants, Join { L\crawlLog("Repeating Crawl Check for Swap..."); if (empty($this->repeat_type) || $this->repeat_type <= 0) { - L\crawlLog("...not a repeating crawl, no swap needed," . + L\crawlLog("SW...not a repeating crawl, no swap needed," . "continuing crawl."); return; } @@ -631,7 +640,7 @@ class QueueServer implements CrawlConstants, Join $this->crawl_time . ".txt"; if ($this->isAScheduler() && file_exists($start_swap_file) && !file_exists($finish_swap_file)) { - L\crawlLog("...performing scheduler swap activities."); + L\crawlLog("SW...performing scheduler swap activities."); // Delete everything associated with the queue $delete_files = [ C\CRAWL_DIR . "/cache/" . self::network_base_name . @@ -672,23 +681,27 @@ class QueueServer implements CrawlConstants, Join DoubleIndexBundle::getStartSchedule($dir_name, $this->channel); file_put_contents($finish_swap_file, time()); unlink($start_swap_file); - L\crawlLog("...done scheduler swap activities!!"); + L\crawlLog("SW...done scheduler swap activities!!"); return; } - if ($this->isAIndexer() && $this->index_archive->swapTimeReached()) { + if ($this->isAIndexer() && ($this->index_archive->swapTimeReached() + || $this->debug == 'FORCE_SWAP')) { if (!file_exists($start_swap_file) && !file_exists($finish_swap_file)) { - L\crawlLog("...swapping live and search crawl!!"); - L\crawlLog("...writing StartSwap file for scheduler !!"); - L\crawlLog("...indexer waits for scheduler to do swap"); + L\crawlLog("SW...swapping live and search crawl!!"); + L\crawlLog("SW...writing StartSwap file for scheduler !!"); + L\crawlLog("SW...indexer waits for scheduler to do swap"); file_put_contents($start_swap_file, time()); } if (!file_exists($start_swap_file) && file_exists($finish_swap_file)) { - L\crawlLog("...indexer performing swap activities"); + L\crawlLog("SW...indexer performing swap activities"); $this->index_archive->swapActiveBundle(); unlink($finish_swap_file); - L\crawlLog("...done indexer swap activities!!"); + L\crawlLog("SW...done indexer swap activities!!"); + } + if ($this->debug == 'FORCE_SWAP') { + $this->debug = ""; } return; } @@ -773,7 +786,7 @@ class QueueServer implements CrawlConstants, Join case self::ARCHIVE_CRAWL: if ($this->isAIndexer()) { $this->processRecrawlRobotUrls(); - if (!file_exists(C\CRAWL_DIR."/schedules/". + if (!file_exists(C\CRAWL_DIR . "/schedules/". self::schedule_name . $this->crawl_time . ".txt")) { $this->writeArchiveCrawlInfo(); } @@ -919,6 +932,12 @@ class QueueServer implements CrawlConstants, Join "..."); if (file_exists($message_file)) { $info = unserialize(file_get_contents($message_file)); + if (isset($info[self::DEBUG])) { + $this->debug = $info[self::DEBUG]; + L\crawlLog("The following debug message found: ". $this->debug); + unlink($message_file); + return $old_info; + } if (empty($info[$this->server_type])) { $info[$this->server_type] = true; if ($this->server_type == self::BOTH || @@ -946,6 +965,7 @@ class QueueServer implements CrawlConstants, Join $this->stopCrawl(); } $this->startCrawl($info); + $info[self::STATUS] == self::CONTINUE_STATE; if (!$is_scheduler) { L\crawlLog("Starting new crawl. Timestamp:" . $this->crawl_time); @@ -1004,6 +1024,7 @@ class QueueServer implements CrawlConstants, Join $this->stopCrawl(); } $this->startCrawl($info); + $info[self::STATUS] == self::CONTINUE_STATE; if (!$is_scheduler) { L\crawlLog("Resuming crawl"); $this->writeAdminMessage("RESUME_CRAWL"); @@ -1117,10 +1138,10 @@ class QueueServer implements CrawlConstants, Join $this->writeAdminMessage("SHUTDOWN_QUEUE"); } if (!isset($this->web_queue->to_crawl_queue)) { - L\crawlLog("URL queue appears to be empty or null"); + L\crawlLog("DQ URL queue appears to be empty or null"); return; } - L\crawlLog("Writing queue contents back to schedules..."); + L\crawlLog("DQ Writing queue contents back to schedules..."); $dir = C\CRAWL_DIR . "/schedules/" . self::schedule_data_base_name . $this->crawl_time; if (!file_exists($dir)) { @@ -1152,8 +1173,8 @@ class QueueServer implements CrawlConstants, Join $schedule_data[self::TO_CRAWL] = []; $fh = $this->web_queue->openUrlArchive(); for ($time = 1; $time < $count; $time++) { - L\crawlTimeoutLog("..have written %s urls of %s urls so far", $time, - $count); + L\crawlTimeoutLog("DQ..have written %s urls of %s urls so far", + $time, $count); $tmp = $this->web_queue->peekQueue($time, $fh); list($url, $weight, , ) = $tmp; // if queue error skip @@ -1454,7 +1475,7 @@ class QueueServer implements CrawlConstants, Join $count = $this->web_queue->to_crawl_queue->count; $fh = $this->web_queue->openUrlArchive(); for ($i = $count; $i > 0; $i--) { - L\crawlTimeoutLog("..Scheduler: Removing least url %s of %s ". + L\crawlTimeoutLog("CW..Scheduler: Removing least url %s of %s ". "from queue.", ($count - $i), floor($count/2)); $tmp = $this->web_queue->peekQueue($i, $fh); list($url, $weight, $flag, $probe) = $tmp; @@ -1918,7 +1939,8 @@ class QueueServer implements CrawlConstants, Join { $memory_limit = L\metricToInt(ini_get("memory_limit")); $current_usage = memory_get_usage(); - if ((0.7 * $memory_limit) < $current_usage) { + if ((0.7 * $memory_limit) < $current_usage || + in_array($this->debug, ['EXCEED_MEMORY', 'EXCEED_MEMORY_HARD'])) { L\crawlLog("Indexer memory usage threshold exceeded!!!"); L\crawlLog("...Threshold is: " . (0.7 * $memory_limit)); L\crawlLog("...Current usage is: " . $current_usage); @@ -1934,7 +1956,11 @@ class QueueServer implements CrawlConstants, Join $current_usage = memory_get_usage(); L\crawlLog("Done index bundle reset, current memory usage is: ". $current_usage); - if ((0.7 * $memory_limit) < $current_usage) { + if ($this->debug == 'EXCEED_MEMORY') { + $this->debug = ""; + } + if ((0.7 * $memory_limit) < $current_usage || + $this->debug == 'EXCEED_MEMORY_HARD') { L\crawlLog("!!!Indexer usage still exceeds threshold, exiting"); exit(); } @@ -1991,9 +2017,9 @@ class QueueServer implements CrawlConstants, Join $this->web_queue->addGotRobotTxtFilter($robot_host); $scheme = UrlParser::getScheme($robot_host); if ($scheme == "gopher") { - $robot_url = $robot_host."/0/robots.txt"; + $robot_url = $robot_host . "/0/robots.txt"; } else { - $robot_url = $robot_host."/robots.txt"; + $robot_url = $robot_host . "/robots.txt"; } if ($this->web_queue->containsUrlQueue($robot_url)) { L\crawlLog("Scheduler Removing $robot_url from queue"); @@ -2145,7 +2171,7 @@ class QueueServer implements CrawlConstants, Join unset($this->waiting_hosts[$hash_host]); //allows crawl-delayed host to be scheduled again } - L\crawlLog("Scheduler Done removing host delayed for schedule ". + L\crawlLog("Scheduler done removing host delayed for schedule ". $sites[self::SCHEDULE_TIME]); $now = time(); /* no schedule should take more than one hour on the other hand schedule data might be waiting for days @@ -2449,9 +2475,9 @@ class QueueServer implements CrawlConstants, Join { $i = 1; // array implementation of priority queue starts at 1 not 0 $fetch_size = 0; - L\crawlLog("Scheduler: Start Produce Fetch Batch."); - L\crawlLog("Crawl Time is: ". $this->crawl_time); - L\crawlLog("Memory usage is " . memory_get_usage() ); + L\crawlLog("FB Scheduler: Start Produce Fetch Batch."); + L\crawlLog("FB Crawl Time is: ". $this->crawl_time); + L\crawlLog("FB Memory usage is " . memory_get_usage() ); $count = $this->web_queue->to_crawl_queue->count; $schedule_time = time(); $first_line = $this->calculateScheduleMetaInfo($schedule_time); @@ -2461,7 +2487,7 @@ class QueueServer implements CrawlConstants, Join $time_per_request_guess = C\MINIMUM_FETCH_LOOP_TIME ; // it would be impressive if we can achieve this speed $current_crawl_index = -1; - L\crawlLog("Scheduler: Trying to Produce Fetch Batch; " . + L\crawlLog("FB Scheduler: Trying to Produce Fetch Batch; " . "Queue Size $count"); $start_time = microtime(true); $fh = $this->web_queue->openUrlArchive(); @@ -2477,7 +2503,7 @@ class QueueServer implements CrawlConstants, Join $max_queue_size = C\NUM_URLS_QUEUE_RAM - C\SEEN_URLS_BEFORE_UPDATE_SCHEDULER * $max_links; while ($i <= $count && $fetch_size < C\MAX_FETCH_SIZE) { - L\crawlTimeoutLog("..Scheduler: still producing fetch batch. ". + L\crawlTimeoutLog("FB..Scheduler: still producing fetch batch. ". "Examining location %s in queue of %s.", $i, $count); //look in queue for url and its weight $tmp = $this->web_queue->peekQueue($i, $fh); @@ -2485,7 +2511,7 @@ class QueueServer implements CrawlConstants, Join // if queue error remove entry any loop if ($tmp === false || strcmp($url, "LOOKUP ERROR") == 0) { $delete_urls[$i] = false; - L\crawlLog("Scheduler: Removing lookup error at". + L\crawlLog("FB Scheduler: Removing lookup error at". " $i during produce fetch"); $i++; continue; @@ -2672,17 +2698,17 @@ class QueueServer implements CrawlConstants, Join } //end while $this->web_queue->closeUrlArchive($fh); $new_time = microtime(true); - L\crawlLog("...Scheduler: Done selecting URLS for fetch batch time ". + L\crawlLog("FB...Scheduler: Done selecting URLS for fetch batch time ". "so far:". L\changeInMicrotime($start_time)); - L\crawlLog("...Scheduler: Examined urls while making fetch batch:" . + L\crawlLog("FB...Scheduler: Examined urls while making fetch batch:" . ($i - 1)); - L\crawlLog("...Scheduler: Number of waiting urls seen in queue:" . + L\crawlLog("FB...Scheduler: Number of waiting urls seen in queue:" . $num_waiting_urls); $num_deletes = count($delete_urls); $k = 0; foreach ($delete_urls as $delete_url) { $k++; - L\crawlTimeoutLog("..Scheduler: Removing selected url %s of %s ". + L\crawlTimeoutLog("FB..Scheduler: Removing selected url %s of %s ". "from queue.", $k, $num_deletes); if ($delete_url) { $this->web_queue->removeQueue($delete_url); @@ -2692,7 +2718,7 @@ class QueueServer implements CrawlConstants, Join $this->web_queue->to_crawl_queue->poll($k); } } - L\crawlLog("...Scheduler: Removed $k URLS for fetch batch from ". + L\crawlLog("FB...Scheduler: Removed $k URLS for fetch batch from ". "queue in time: " . L\changeInMicrotime($new_time)); $new_time = microtime(true); if (isset($sites) && count($sites) > 0 ) { @@ -2722,8 +2748,9 @@ class QueueServer implements CrawlConstants, Join $num_sites = count($sites); $k = 0; foreach ($sites as $site) { - L\crawlTimeoutLog("..Scheduler: Still Writing fetch schedule" . - " %s of %s.", $k, $num_sites); + L\crawlTimeoutLog( + "FB..Scheduler: Still Writing fetch schedule %s of %s.", + $k, $num_sites); $k++; $extracted_etag = null; list($url, $weight, $delay) = $site; @@ -2777,17 +2804,17 @@ class QueueServer implements CrawlConstants, Join fwrite($fh, $out_string); } fclose($fh); - L\crawlLog("...Scheduler: Sort URLS and write schedule time: ". + L\crawlLog("FB...Scheduler: Sort URLS and write schedule time: ". L\changeInMicrotime($new_time)); - L\crawlLog("Scheduler: End Produce Fetch Batch Memory usage". + L\crawlLog("FB Scheduler: End Produce Fetch Batch Memory usage: ". memory_get_usage() ); - L\crawlLog("Scheduler: Created fetch batch of size $num_sites.". + L\crawlLog("FB Scheduler: Created fetch batch of size $num_sites.". " $num_deletes urls were deleted.". " Queue size is now ". $this->web_queue->to_crawl_queue->count. "...Total Time to create batch: ". L\changeInMicrotime($start_time)); } else { - L\crawlLog("Scheduler: No fetch batch created!! " . + L\crawlLog("FB Scheduler: No fetch batch created!! " . "Time failing to make a fetch batch:" . L\changeInMicrotime($start_time).". Loop properties:$i $count". " $num_deletes urls were deleted in failed attempt."); @@ -2795,9 +2822,9 @@ class QueueServer implements CrawlConstants, Join if ($num_deletes < 5 && $i >= $count && $count >= C\NUM_URLS_QUEUE_RAM - C\SEEN_URLS_BEFORE_UPDATE_SCHEDULER * $max_links) { - L\crawlLog("Scheduler: Queue Full and Couldn't produce Fetch ". - "Batch!! Or Delete any URLS!!!"); - L\crawlLog("Scheduler: Rescheduling Queue Contents ". + L\crawlLog("FB Scheduler: Queue Full and Couldn't produce ". + "Fetch Batch!! Or Delete any URLS!!!"); + L\crawlLog("FB Scheduler: Rescheduling Queue Contents ". "(not marking seen) to try to unjam!"); $this->dumpQueueToSchedules(true); $this->clearWebQueue(); diff --git a/src/library/CrawlConstants.php b/src/library/CrawlConstants.php index 52768208e..28f065b3f 100755 --- a/src/library/CrawlConstants.php +++ b/src/library/CrawlConstants.php @@ -111,6 +111,7 @@ interface CrawlConstants const HEIGHT = 'B'; const WIDTH = 'C'; const ROBOTS_TXT = 'D'; + const DEBUG = "E"; // codes available here const DOC_DEPTH = 'M'; const DOC_RANK = 'N'; diff --git a/src/library/CrawlDaemon.php b/src/library/CrawlDaemon.php index 654b8552a..b99d5f3bc 100644 --- a/src/library/CrawlDaemon.php +++ b/src/library/CrawlDaemon.php @@ -111,7 +111,7 @@ class CrawlDaemon implements CrawlConstants $name_string = CrawlDaemon::getNameString(self::$name, self::$subname); if (($now - $time) > C\PROCESS_TIMEOUT) { - crawlLog($name_string.": ".($now - $time) . + crawlLog($name_string . ": ".($now - $time) . " seconds has elapsed since processHandler last called.", null, true); crawlLog("Timeout exceeded...", null, true); @@ -129,7 +129,7 @@ class CrawlDaemon implements CrawlConstants * Used to send a message the given daemon or run the program in the * foreground. * - * @param array $argv an array of command line arguments. The argument + * @param array $init_argv an array of command line arguments. The argument * start will check if the process control functions exists if these * do they will fork and detach a child process to act as a daemon. * a lock file will be created to prevent additional daemons from @@ -144,7 +144,7 @@ class CrawlDaemon implements CrawlConstants * to see if already running before starting * @param string $use_message echo'd if incorrect parameters sent */ - public static function init($argv, $name, $exit_type = 1, + public static function init($init_argv, $name, $exit_type = 1, $use_message = "") { $use_message = ($use_message) ? $use_message : @@ -157,8 +157,8 @@ class CrawlDaemon implements CrawlConstants "Additional arguments are described in Yioop documentation.\n"; self::$name = $name; - if (isset($argv[2]) && $argv[2] != "none") { - self::$subname = $argv[2]; + if (isset($init_argv[2]) && $init_argv[2] != "none") { + self::$subname = $init_argv[2]; } else { self::$subname = ""; } @@ -168,24 +168,51 @@ class CrawlDaemon implements CrawlConstants echo "BAD REQUEST"; exit(); } - if (!isset($argv[1])) { + if (!isset($init_argv[1])) { echo $use_message; exit(); } $messages_file = self::getMesssageFileName(self::$name, self::$subname); - switch ($argv[1]) { + switch ($init_argv[1]) { + case "child": + self::$mode = 'daemon'; + $info = []; + $info[self::STATUS] = self::WAITING_START_MESSAGE_STATE; + if ($name != 'index') { + file_put_contents($messages_file, serialize($info)); + chmod($messages_file, 0777); + } + $_SERVER["LOG_TO_FILES"] = true; + // if false log messages are sent to the console + break; + case "debug": + $num_args = count($init_argv); + if ($num_args <= 3) { + echo "Too few args. Might need to specify channel.\n"; + } else if ($num_args > 3) { + $last_arg = $init_argv[$num_args - 1]; + echo $messages_file; + $info = []; + $info[self::DEBUG] = $last_arg; + file_put_contents($messages_file, serialize($info)); + chmod($messages_file, 0777); + } + exit(); case "start": $options = ""; $quote = (strstr(PHP_OS, "WIN")) ? '' : '"'; - for ($i = 3; $i < count($argv); $i++) { - $options .= $quote . $argv[$i]. $quote . " "; + for ($i = 3; $i < count($init_argv); $i++) { + $options .= $quote . $init_argv[$i]. $quote . " "; } $options = trim($options); - $subname = (!isset($argv[2]) || $argv[2] == 'none') ? + $subname = (!isset($init_argv[2]) || $init_argv[2] == 'none') ? 'none' :self::$subname; - $name_prefix = (isset($argv[3])) ? $argv[3] : self::$subname; + $name_prefix = (isset($init_argv[3])) ? $init_argv[3] : + self::$subname; $name_string = CrawlDaemon::getNameString($name, $name_prefix); - echo "Starting $name_string...\n"; + self::daemonLog("Starting $name_string...", $exit_type); + self::daemonLog("options: $name, $subname, $options", + $exit_type); CrawlDaemon::start($name, $subname, $options, $exit_type); break; case "stop": @@ -201,22 +228,31 @@ class CrawlDaemon implements CrawlConstants } $_SERVER["LOG_TO_FILES"] = false; break; - case "child": - self::$mode = 'daemon'; - $info = []; - $info[self::STATUS] = self::WAITING_START_MESSAGE_STATE; - if ($name != 'index') { - file_put_contents($messages_file, serialize($info)); - chmod($messages_file, 0777); - } - $_SERVER["LOG_TO_FILES"] = true; - // if false log messages are sent to the console - break; default: echo $use_message; exit(); } } + /** + * Used to print a log message in a way helpful to aid debugging + * CrawlDaemon tasks where crawlLog() might not yet be set up + * Sends the message to stardard out if crawlLog not set up; otherwise, + * sends to crawlLog() + * + * @param string $masg string to log to either standard out or + * to Yioop's crawlLog + * @param int $exit_type the exit_type used by init() and start() + * values of absolute value >2 are only used if crawlLog has + * already been set up + */ + public static function daemonLog($msg, $exit_type) + { + if (in_array($exit_type, [-2, -1, 0, 1, 2])) { + echo "$msg\n"; + } else { + crawlLog($msg); + } + } /** * Used to start a daemon running in the background * @@ -399,7 +435,7 @@ class CrawlDaemon implements CrawlConstants */ public static function getNameString($name, $subname) { - return ($subname == "") ? $name : $subname . "-" . $name; + return ($subname === "") ? $name : $subname . "-" . $name; } /** * Returns the statuses of the running daemons