viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

Replaced the use of array() by []

Salil Shenoy [2016-10-22 22:Oct:nd]
Replaced the use of array() by []

Signed-off-by: Chris Pollett <chris@pollett.org>
Filename
src/executables/Fetcher.php
src/library/TripletExtractor.php
diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php
index 01735b4e9..22aa9a3be 100755
--- a/src/executables/Fetcher.php
+++ b/src/executables/Fetcher.php
@@ -15,7 +15,7 @@
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * GNU General Public License for more details.89
  *
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
@@ -559,6 +559,7 @@ class Fetcher implements CrawlConstants
         $local_archives = [""];
         while (CrawlDaemon::processHandler()) {
             $start_time = microtime(true);
+            $info = [];
             $fetcher_message_file = C\CRAWL_DIR.
                 "/schedules/{$prefix}FetcherMessages.txt";
             if (file_exists($fetcher_message_file)) {
@@ -577,6 +578,8 @@ class Fetcher implements CrawlConstants
                 if ($info[self::CRAWL_TIME] == 0) {
                     $info[self::STATUS] = self::NO_DATA_STATE;
                     $this->to_crawl = [];
+                } else {
+                   L\crawlLog("Crawl time is now " . $this->crawl_time);
                 }
             } else if ($this->crawl_type == self::ARCHIVE_CRAWL &&
                     $this->arc_type != "WebArchiveBundle" &&
@@ -981,6 +984,11 @@ class Fetcher implements CrawlConstants
         if (isset($info[self::CRAWL_TIME])
             && ($info[self::CRAWL_TIME] != $this->crawl_time
             || $info[self::CRAWL_TIME] == 0)) {
+            if ($info[self::CRAWL_TIME] > 0) {
+                L\crawlLog("New Crawl Time Found: {$info[self::CRAWL_TIME]}");
+            } else {
+                L\crawlLog("Crawl Time Changing to 0");
+            }
             $dir = C\CRAWL_DIR."/schedules";
             $time_change = true;
             /* Zero out the crawl. If haven't done crawl before, then scheduler
@@ -1033,6 +1041,8 @@ class Fetcher implements CrawlConstants
                 "{$this->crawl_time}.txt") && file_exists(
                 "$dir/$prefix".self::fetch_batch_name.
                     "{$this->crawl_time}.txt")) {
+                L\crawlLog("Loading old batches for ".
+                    "{$this->crawl_time}.");
                 $info = unserialize(file_get_contents(
                     "$dir/$prefix".self::fetch_crawl_info.
                         "{$this->crawl_time}.txt"));
@@ -1762,6 +1772,9 @@ class Fetcher implements CrawlConstants
                 }
                 $doc_info = $processor->handle($site[self::PAGE],
                     $site[self::URL]);
+                if (C\FETCHER_PROCESS_DELAY > 0 ) {
+                    usleep(C\FETCHER_PROCESS_DELAY);
+                }
                 if (isset($site[self::REPOSITORY_TYPE]) &&
                     $site[self::REPOSITORY_TYPE] == self::REPOSITORY_GIT) {
                     $site[self::URL] = $tmp_url_store;
diff --git a/src/library/TripletExtractor.php b/src/library/TripletExtractor.php
index 2dd8bbc2f..6bb38874f 100644
--- a/src/library/TripletExtractor.php
+++ b/src/library/TripletExtractor.php
@@ -23,10 +23,9 @@
  * @author Chris Pollett chris@pollett.org
  * @license http://www.gnu.org/licenses/ GPL3
  * @link http://www.seekquarry.com/
- * @copyright 2009 - 2015
+ * @copyright 2009 - 2016
  * @filesource
  */
-
 namespace seekquarry\yioop\library;

 use seekquarry\yioop\configs as C;
@@ -49,10 +48,10 @@ class TripletExtractor
     /**
      * Takes a phrase and tags each term in it with its part of speech.
      * So each term in the original phrase gets mapped to term~part_of_speech
-     * This tagger is based on a Brill tagger. It makes uses a lexicon
+     * This tagger is based on a Brill tagger. It uses a lexicon
      * consisting of words from the Brown corpus together with a list of
      * part of speech tags that that word had in the Brown Corpus. These are
-     * used to get an initial part of speech (in word was not present than
+     * used to get an initial part of speech (if word was not present than
      * we assume it is a noun). From this a fixed set of rules is used to modify
      * the initial tag if necessary.
      *
@@ -86,13 +85,13 @@ class TripletExtractor
         }
         preg_match_all("/[\w\d]+/", $text, $matches);
         $tokens = $matches[0];
-        $nouns = array('NN', 'NNS', 'NNP');
-        $verbs = array('VBD', 'VBP', 'VB');
-        $result = array();
-        $previous = array('token' => -1, 'tag' => -1);
+        $nouns = ['NN', 'NNS', 'NNP'];
+        $verbs = ['VBD', 'VBP', 'VB'];
+        $result = [];
+        $previous = ['token' => -1, 'tag' => -1];
         $previous_token = -1;
         sort($tokens);
-        $dictionary = array();
+        $dictionary = [];
         /*
             Notice we sorted the tokens, and notice how we use $cur_pos
             so only advance forward through $lex_string. So the
@@ -118,9 +117,9 @@ class TripletExtractor
         $tag_list = array();
         foreach ($matches[0] as $token) {
             $prev_tag_list = $tag_list;
-            $tag_list = array();
+            $tag_list = [];
             // default to a common noun
-            $current = array('token' => $token, 'tag' => 'NN');
+            $current = ['token' => $token, 'tag' => 'NN'];
             // remove trailing full stops
             $token = strtolower(rtrim($token, "."));
             if (isset($dictionary[$token])) {
@@ -208,7 +207,7 @@ class TripletExtractor
     public static function taggedPartOfSpeechTokensToString($tagged_tokens)
     {
         $tagged_phrase = "";
-        $simplified_parts_of_speech = array(
+        $simplified_parts_of_speech = [
             "NN" => "NN",
             "NNS" => "NN",
             "NNP" => "NN",
@@ -228,7 +227,7 @@ class TripletExtractor
             "RBR" => "AV",
             "RBS" => "AV",
             "WRB" => "AV"
-        );
+        ];
         foreach ($tagged_tokens as $t) {
             $tag = trim($t['tag']);
             $tag = (isset($simplified_parts_of_speech[$tag])) ?
@@ -257,12 +256,12 @@ class TripletExtractor
             fclose($fh);
         }
         preg_match_all("/[\w\d\.]+/", $text, $matches);
-        $nouns = array('NN', 'NNS');
-        $return = array();
+        $nouns = ['NN', 'NNS'];
+        $return = [];
         $i = 0;
         foreach ($matches[0] as $token) {
             // default to a common noun
-            $return[$i] = array('token' => $token, 'tag' => 'NN');
+            $return[$i] = ['token' => $token, 'tag' => 'NN'];
             // remove trailing full stops
             if (substr($token, -1) == '.') {
                 $token = preg_replace('/\.+$/', '', $token);
@@ -275,7 +274,7 @@ class TripletExtractor
             if ($i > 0) {
                 if ($return[$i - 1]['tag'] == 'DT' &&
                     in_array($return[$i]['tag'],
-                        array('VBD', 'VBP', 'VB'))
+                        ['VBD', 'VBP', 'VB'])
                 ) {
                     $return[$i]['tag'] = 'NN';
                 }
@@ -346,7 +345,7 @@ class TripletExtractor
      */
     public static function generateParseTreeUsingRDP($tagger_array)
     {
-        $tree = array();
+        $tree = [];
         $tree = ["cur_node" => 0];
         $tree_np = TripletExtractor::extractNPUsingRDP($tagger_array, $tree);
         $tree = ["cur_node" => $tree_np['cur_node']];
@@ -578,7 +577,7 @@ class TripletExtractor
     */
    public static function extractTriplet($tree)
    {
-       $triplet = array();
+       $triplet = [];
        $triplet['subject'] = TripletExtractor::extractSubjectFromTree($tree);
        $triplet['predicate'] =
            TripletExtractor::extractPredicateFromTree($tree);
@@ -592,7 +591,7 @@ class TripletExtractor
      */
     public static function processTripletForStorage($triplet_tree)
     {
-        $processed_triplet = array();
+        $processed_triplet = [];
         $processed_triplet['RAW'] =
             TripletExtractor::getRawTripletForStorage($triplet_tree);
         $processed_triplet['FEATURED'] =
@@ -606,8 +605,8 @@ class TripletExtractor
      */
     public static function getRawTripletForStorage($triplet_tree)
     {
-        $raw_triplet = array();
-        $question_answer_triplet = array();
+        $raw_triplet = [];
+        $question_answer_triplet = [];
         if (isset($triplet_tree['subject']['RAW'])
             && isset($triplet_tree['predicate']['RAW'])
             && isset($triplet_tree['object']['RAW'])
@@ -641,8 +640,8 @@ class TripletExtractor
      */
     public static function getFeaturedTripletForStorage($triplet_tree)
     {
-        $featured_triplet = array();
-        $question_answer_triplet = array();
+        $featured_triplet = [];
+        $question_answer_triplet = [];
         if (isset($triplet_tree['subject']['FEATURED'])
             && isset($triplet_tree['predicate']['FEATURED'])
             && isset($triplet_tree['object']['FEATURED'])
@@ -685,7 +684,7 @@ class TripletExtractor
      */
     public static function extractSubjectFromTree($tree)
     {
-        $subject = array();
+        $subject = [];
         if (isset($tree['NP']) && $tree['NP'] != null) {
             $tree_np = $tree['NP'];
             $value = TripletExtractor::extractFirstNounFromNPTree($tree_np);
@@ -709,7 +708,7 @@ class TripletExtractor
      */
     public static function extractPredicateFromTree($tree)
     {
-        $predicate = array();
+        $predicate = [];
         if (isset($tree['VP']) && $tree['VP'] != null) {
             $tree_vp = $tree['VP'];
             $value = TripletExtractor::extractDeepestVerbFromVBTree($tree_vp);
@@ -736,7 +735,7 @@ class TripletExtractor
      */
     public static function extractObjectFromTree($tree)
     {
-        $object = array();
+        $object = [];
         if (isset($tree['VP']) && $tree['VP'] != null) {
             $tree_vp = $tree['VP'];
             if (isset($tree_vp['NP']) && $tree_vp['NP'] != null) {
@@ -798,7 +797,7 @@ class TripletExtractor
      */
     public static function extractAttributes($tree)
     {
-        $attribute_map = array();
+        $attribute_map = [];
         if (isset($tree['JJ']) && count($tree['JJ']) > 0) {
             $attribute_map['JJ'] = $tree['JJ']['JJ'];
         }
@@ -846,8 +845,8 @@ class TripletExtractor
      */
     public static function storeStatementArraysAsTriplet($statement_array)
     {
-        $triplets_list = array();
-        $question_list = array();
+        $triplets_list = [];
+        $question_list = [];
         $question_answer_list = array();
         foreach ($statement_array as $key => $value) {
             try {
@@ -894,7 +893,7 @@ class TripletExtractor
         $question_string_tagged = TripletExtractor::partOfSpeechTagger_Brill(
             $question_string);
         $index = 0;
-        $generated_question_array = array();
+        $generated_question_array = [];
         if (isset($question_string_tagged[$index]) &&
             ("WRB" == trim($question_string_tagged[$index]['tag']) ||
                 "WP" == trim($question_string_tagged[$index]['tag']))
@@ -934,10 +933,10 @@ class TripletExtractor
      */
     public static function parseWHOQuestion($question_string_tagged, $index)
     {
-        $generated_question_array = array();
+        $generated_question_array = [];
         $tree = ["cur_node" => $index];
         $tree['NP'] = "WHO";
-        $triplet = array();
+        $triplet = [];
         $tree_vp = TripletExtractor::extractVPUsingRDP(
             $question_string_tagged, $tree);
         $triplet['predicate'] = TripletExtractor::extractPredicateFromTree(
@@ -989,7 +988,7 @@ class TripletExtractor
      */
     public static function parseWHPlusQuestion($question_string_tagged, $index)
     {
-        $generated_question_array = array();
+        $generated_question_array = [];
         $aux_verb = "";
         while (isset($question_string_tagged[$index]) &&
             ("VB" == trim($question_string_tagged[$index]['tag']) ||
@@ -1003,7 +1002,7 @@ class TripletExtractor
         }
         $tree = ["cur_node" => $index];
         $tree['NP'] = "WHPlus";
-        $triplet = array();
+        $triplet = [];
         $tree_np = TripletExtractor::extractNPUsingRDP(
             $question_string_tagged, $tree);
         $triplet['subject'] = TripletExtractor::extractSubjectFromTree(
ViewGit