viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

tacks on a fragment to make links unique for weather scrapign source urls, a=chris

Chris Pollett [2016-11-15 07:Nov:th]
tacks on a fragment to make links unique for weather scrapign source urls, a=chris
Filename
src/executables/Fetcher.php
src/library/CrawlConstants.php
src/library/media_jobs/FeedsUpdateJob.php
src/models/PhraseModel.php
diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php
index a8bfd5a44..0edf1bcb8 100755
--- a/src/executables/Fetcher.php
+++ b/src/executables/Fetcher.php
@@ -2793,10 +2793,9 @@ class Fetcher implements CrawlConstants
                 ]->addDocumentWords($doc_keys, self::NEEDS_OFFSET_FLAG,
                 $word_lists, $meta_ids, PhraseParser::$materialized_metas,
                 true, $doc_rank);
-
             if(isset($word_and_qa_lists['QUESTION_ANSWER_LIST'])) {
-                $question_list = $word_and_qa_lists['QUESTION_ANSWER_LIST'];
-                $site[self::QUESTION_TRIPLETS] = $question_list;
+                $site[self::QUESTION_ANSWERS] =
+                    $word_and_qa_lists['QUESTION_ANSWER_LIST'];
                 $this->found_sites[self::SEEN_URLS][$i] = $site;
             }
             /*
diff --git a/src/library/CrawlConstants.php b/src/library/CrawlConstants.php
index d9c5e2064..b6e242aa9 100755
--- a/src/library/CrawlConstants.php
+++ b/src/library/CrawlConstants.php
@@ -232,6 +232,6 @@ interface CrawlConstants
     const SCRAPER_LABEL = 'du';
     const SCRAPERS = 'dv';
     const IS_NEWS = "dw";
-    const QUESTION_TRIPLETS = 'dx';
+    const QUESTION_ANSWERS = 'dx';
     const CONTENT_LENGTH = 'dy';
 }
diff --git a/src/library/media_jobs/FeedsUpdateJob.php b/src/library/media_jobs/FeedsUpdateJob.php
index 474369c8a..419845a5c 100644
--- a/src/library/media_jobs/FeedsUpdateJob.php
+++ b/src/library/media_jobs/FeedsUpdateJob.php
@@ -336,8 +336,7 @@ class FeedsUpdateJob extends MediaJob
                 $nodes = $dom->getElementsByTagName('item');
                 // see above comment on why slink rather than link
                 $item_elements = ["title" => "title",
-                    "description"
-                    => "description", "link" =>"slink",
+                    "description" => "description", "link" =>"slink",
                     "guid" => "guid", "pubdate" => "pubdate"];
                 if ($nodes->length == 0) {
                     // maybe we're dealing with atom rather than rss
@@ -457,6 +456,11 @@ class FeedsUpdateJob extends MediaJob
                             "&n=" . $image_hash;
                     }
                 }
+                if (!empty($item['link']) && !empty($item['title']) &&
+                    !empty($item_elements['link'])
+                    && $item['link'] == $item_elements['link']) {
+                    $item['link'] .= "#".L\crawlHash($item['title']);
+                }
                 $did_add = $this->addFeedItemIfNew($item, $feed['NAME'], $lang,
                     $age, $unique_fields);
                 if ($did_add) {
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index 4aada09fe..9fd8c2940 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -401,10 +401,10 @@ class PhraseModel extends ParallelModel
                             $results['PAGES'][$i] =
                                 $out_results['PAGES'][$out_count];
                             if (isset($out_results['PAGES'][$out_count]
-                                [self::QUESTION_TRIPLETS])) {
+                                [self::QUESTION_ANSWERS])) {
                                 $triplets_with_answer =
                                     $out_results['PAGES'][$out_count]
-                                    [self::QUESTION_TRIPLETS];
+                                    [self::QUESTION_ANSWERS];
                                 $question = trim($phrase);
                                 if (isset($triplets_with_answer[$question])) {
                                     $out_results['PAGES'][$out_count]['ANSWER']=
ViewGit