viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

Improve formatting cache pages in case page did not have a head, fix an issue with resuming crawls,a=chris

Chris Pollett [2019-01-16 18:Jan:th]
Improve formatting cache pages in case page did not have a head, fix an issue with resuming crawls,a=chris
Filename
src/controllers/SearchController.php
src/controllers/components/CrawlComponent.php
src/executables/Fetcher.php
src/library/CrawlDaemon.php
src/models/Model.php
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php
index 55afb017e..937db4aa7 100755
--- a/src/controllers/SearchController.php
+++ b/src/controllers/SearchController.php
@@ -1575,6 +1575,19 @@ class SearchController extends Controller implements CrawlConstants
         }
         $dom->encoding = "UTF-8"; // insert proper
         $head = $dom->getElementsByTagName('head')->item(0);
+        $body = $dom->getElementsByTagName('body')->item(0);
+        $html_node = $dom->getElementsByTagName('html')->item(0);
+        if (is_object($html_node) && is_object($body)&& !is_object($head)) {
+            //make a head if it doesn't exis, but rest of page like html
+            $html_first_child = $html_node->firstChild;
+            $head = $dom->createElement('head');
+            $title = $dom->createElement('title');
+            $text_node = $dom->createTextNode(
+                tl('search_controller_site_cache'));
+            $title->appendChild($text_node);
+            $head->appendChild($title);
+            $html_node->insertBefore($head, $html_first_child);
+        }
         if (is_object($head)) {
             // add a noindex nofollow robot directive to page
             $head_first_child = $head->firstChild;
diff --git a/src/controllers/components/CrawlComponent.php b/src/controllers/components/CrawlComponent.php
index 1972842d8..1ff1a301e 100644
--- a/src/controllers/components/CrawlComponent.php
+++ b/src/controllers/components/CrawlComponent.php
@@ -214,7 +214,7 @@ class CrawlComponent extends Component implements CrawlConstants
                     $parent->web_site->filePutContents($filename,
                         serialize($crawl_params));
                     chmod($filename, 0777);
-                    if($crawl_model->sendMessage($crawl_params,
+                    if($crawl_model->sendStartCrawlMessage($crawl_params,
                         null, $machine_urls)) {
                         return $parent->redirectWithMessage(
                             tl('crawl_component_resume_crawl'),
diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php
index 8fb305a39..fca344937 100755
--- a/src/executables/Fetcher.php
+++ b/src/executables/Fetcher.php
@@ -53,14 +53,14 @@ if (php_sapi_name() != 'cli' ||
 ini_set("memory_limit", "1200M"); //so have enough memory to crawl sitemaps

 /** for L\crawlHash and L\crawlLog and Yioop constants*/
-require_once __DIR__."/../library/Utility.php";
+require_once __DIR__ . "/../library/Utility.php";
 if (!C\PROFILE) {
     echo "Please configure the search engine instance by visiting" .
         "its web interface on localhost.\n";
     exit();
 }
 /** To guess language based on page encoding */
-require_once __DIR__."/../library/LocaleFunctions.php";
+require_once __DIR__ . "/../library/LocaleFunctions.php";
 /*
  * We'll set up multi-byte string handling to use UTF-8
  */
@@ -583,7 +583,7 @@ class Fetcher implements CrawlConstants
         L\crawlLog("PHP Version in use: " . phpversion());
         $prefix = $this->fetcher_num."-";
         if (!file_exists(C\CRAWL_DIR."/{$prefix}temp")) {
-            mkdir(C\CRAWL_DIR."/{$prefix}temp");
+            mkdir(C\CRAWL_DIR . "/{$prefix}temp");
         }
         $info[self::STATUS] = self::CONTINUE_STATE;
         $local_archives = [""];
diff --git a/src/library/CrawlDaemon.php b/src/library/CrawlDaemon.php
index d9bf01ac8..4ac836328 100644
--- a/src/library/CrawlDaemon.php
+++ b/src/library/CrawlDaemon.php
@@ -211,6 +211,7 @@ class CrawlDaemon implements CrawlConstants
                     // if false log messages are sent to the console
                 break;
             default:
+                echo $use_message;
                 exit();
         }
     }
diff --git a/src/models/Model.php b/src/models/Model.php
index ea3ca8a8b..382e964da 100755
--- a/src/models/Model.php
+++ b/src/models/Model.php
@@ -255,6 +255,10 @@ class Model implements CrawlConstants
                 $page[self::DESCRIPTION] = mb_substr(strip_tags(
                     $page[self::DESCRIPTION]), 0, $description_length);
             }
+            $page[self::DESCRIPTION] = preg_replace("/\p{C}+/u", "",
+                $page[self::DESCRIPTION]);
+            $page[self::DESCRIPTION] = preg_replace("/^[^\p{L}]+/u", "",
+                $page[self::DESCRIPTION]);
             $page[self::SCORE] = mb_substr($page[self::SCORE], 0,
                 self::SCORE_PRECISION);
             $pages[$i] = $page;
ViewGit