viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
Filename | |
---|---|
src/controllers/SearchController.php | |
src/controllers/components/CrawlComponent.php | |
src/executables/Fetcher.php | |
src/library/CrawlDaemon.php | |
src/models/Model.php |
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php index 55afb017e..937db4aa7 100755 --- a/src/controllers/SearchController.php +++ b/src/controllers/SearchController.php @@ -1575,6 +1575,19 @@ class SearchController extends Controller implements CrawlConstants } $dom->encoding = "UTF-8"; // insert proper $head = $dom->getElementsByTagName('head')->item(0); + $body = $dom->getElementsByTagName('body')->item(0); + $html_node = $dom->getElementsByTagName('html')->item(0); + if (is_object($html_node) && is_object($body)&& !is_object($head)) { + //make a head if it doesn't exis, but rest of page like html + $html_first_child = $html_node->firstChild; + $head = $dom->createElement('head'); + $title = $dom->createElement('title'); + $text_node = $dom->createTextNode( + tl('search_controller_site_cache')); + $title->appendChild($text_node); + $head->appendChild($title); + $html_node->insertBefore($head, $html_first_child); + } if (is_object($head)) { // add a noindex nofollow robot directive to page $head_first_child = $head->firstChild; diff --git a/src/controllers/components/CrawlComponent.php b/src/controllers/components/CrawlComponent.php index 1972842d8..1ff1a301e 100644 --- a/src/controllers/components/CrawlComponent.php +++ b/src/controllers/components/CrawlComponent.php @@ -214,7 +214,7 @@ class CrawlComponent extends Component implements CrawlConstants $parent->web_site->filePutContents($filename, serialize($crawl_params)); chmod($filename, 0777); - if($crawl_model->sendMessage($crawl_params, + if($crawl_model->sendStartCrawlMessage($crawl_params, null, $machine_urls)) { return $parent->redirectWithMessage( tl('crawl_component_resume_crawl'), diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php index 8fb305a39..fca344937 100755 --- a/src/executables/Fetcher.php +++ b/src/executables/Fetcher.php @@ -53,14 +53,14 @@ if (php_sapi_name() != 'cli' || ini_set("memory_limit", "1200M"); //so have enough memory to crawl sitemaps /** for L\crawlHash and L\crawlLog and Yioop constants*/ -require_once __DIR__."/../library/Utility.php"; +require_once __DIR__ . "/../library/Utility.php"; if (!C\PROFILE) { echo "Please configure the search engine instance by visiting" . "its web interface on localhost.\n"; exit(); } /** To guess language based on page encoding */ -require_once __DIR__."/../library/LocaleFunctions.php"; +require_once __DIR__ . "/../library/LocaleFunctions.php"; /* * We'll set up multi-byte string handling to use UTF-8 */ @@ -583,7 +583,7 @@ class Fetcher implements CrawlConstants L\crawlLog("PHP Version in use: " . phpversion()); $prefix = $this->fetcher_num."-"; if (!file_exists(C\CRAWL_DIR."/{$prefix}temp")) { - mkdir(C\CRAWL_DIR."/{$prefix}temp"); + mkdir(C\CRAWL_DIR . "/{$prefix}temp"); } $info[self::STATUS] = self::CONTINUE_STATE; $local_archives = [""]; diff --git a/src/library/CrawlDaemon.php b/src/library/CrawlDaemon.php index d9bf01ac8..4ac836328 100644 --- a/src/library/CrawlDaemon.php +++ b/src/library/CrawlDaemon.php @@ -211,6 +211,7 @@ class CrawlDaemon implements CrawlConstants // if false log messages are sent to the console break; default: + echo $use_message; exit(); } } diff --git a/src/models/Model.php b/src/models/Model.php index ea3ca8a8b..382e964da 100755 --- a/src/models/Model.php +++ b/src/models/Model.php @@ -255,6 +255,10 @@ class Model implements CrawlConstants $page[self::DESCRIPTION] = mb_substr(strip_tags( $page[self::DESCRIPTION]), 0, $description_length); } + $page[self::DESCRIPTION] = preg_replace("/\p{C}+/u", "", + $page[self::DESCRIPTION]); + $page[self::DESCRIPTION] = preg_replace("/^[^\p{L}]+/u", "", + $page[self::DESCRIPTION]); $page[self::SCORE] = mb_substr($page[self::SCORE], 0, self::SCORE_PRECISION); $pages[$i] = $page;