diff --git a/src/configs/Createdb.php b/src/configs/Createdb.php index c01c32cf0..2c160c3da 100755 --- a/src/configs/Createdb.php +++ b/src/configs/Createdb.php @@ -458,7 +458,7 @@ $media_sources = [ ['100000004', 'National Weather Service 4', 'regex', 'weather', 'http://forecast.weather.gov/product.php?'. 'site=NWS&issuedby=04&product=SCS&format=txt&version=1&glossary=0', - '/WEA\s+LO/HI\n+([^<]+)\n+NATIONAL/mi###/\n/###'. + '/WEA\s+LO\/HI\s*\n+([^<]+)\n+NATIONAL/mi###/\n/###'. '/^(.+?)\s\s\s+/###/\s\s\s+(.+?)$/###http://www.weather.gov/###', 'en-US'], ['100000005', 'Ted', 'feed_podcast', '2592000', diff --git a/src/controllers/Controller.php b/src/controllers/Controller.php index a1124ddd0..258858a86 100755 --- a/src/controllers/Controller.php +++ b/src/controllers/Controller.php @@ -764,10 +764,10 @@ abstract class Controller case "file_name": if (isset($value)) { $value = str_replace("&", "&", $value); - $value = str_replace("/", "", $value); - $value = str_replace("\\", "", $value); - $value = str_replace("*", "", $value); - $clean_value = str_replace(":", "", $value); + $value = str_replace("/", "-", $value); + $value = str_replace("\\", "-", $value); + $value = str_replace("*", "-", $value); + $clean_value = str_replace(":", "-", $value); } else { $clean_value = $default; } diff --git a/src/data/public_default.db b/src/data/public_default.db index 37d2e814f..ce2b4c51d 100644 Binary files a/src/data/public_default.db and b/src/data/public_default.db differ diff --git a/src/library/media_jobs/AnalyticsJob.php b/src/library/media_jobs/AnalyticsJob.php index 2af3b856c..b515b7cf1 100644 --- a/src/library/media_jobs/AnalyticsJob.php +++ b/src/library/media_jobs/AnalyticsJob.php @@ -33,6 +33,7 @@ namespace seekquarry\yioop\library\media_jobs; use seekquarry\yioop\configs as C; use seekquarry\yioop\library as L; use seekquarry\yioop\library\CrawlConstants; +use seekquarry\yioop\library\UrlParser; use seekquarry\yioop\library\processors\PageProcessor; use seekquarry\yioop\models\ImpressionModel; use seekquarry\yioop\models\MachineModel; @@ -330,8 +331,6 @@ class AnalyticsJob extends MediaJob $results = $this->phrase_model->getPhrasePageResults( "$query i:$index_timestamp", 0, 1, true, null, false, 0, $machine_urls); - echo $query."\n"; - print_r($results); return (isset($results["TOTAL_ROWS"])) ? $results["TOTAL_ROWS"] : -1; } } diff --git a/src/library/media_jobs/FeedsUpdateJob.php b/src/library/media_jobs/FeedsUpdateJob.php index 8c994f357..d75ff8cb5 100644 --- a/src/library/media_jobs/FeedsUpdateJob.php +++ b/src/library/media_jobs/FeedsUpdateJob.php @@ -387,6 +387,7 @@ class FeedsUpdateJob extends MediaJob $log_function("----Scraped channel is:", "h3"); } $channel = ""; + $nodes = []; if (!empty($matches[1])) { if ($test_mode) { $log_function($matches[1]); diff --git a/src/library/media_jobs/WikiMediaJob.php b/src/library/media_jobs/WikiMediaJob.php index 2610fc83a..20bfd9662 100644 --- a/src/library/media_jobs/WikiMediaJob.php +++ b/src/library/media_jobs/WikiMediaJob.php @@ -39,6 +39,7 @@ use seekquarry\yioop\library\IndexShard; use seekquarry\yioop\library\PhraseParser; use seekquarry\yioop\library\UrlParser; use seekquarry\yioop\models\GroupModel; +use seekquarry\yioop\controllers\CrawlController; /** * A media job to download and index feeds from various search sources (RSS, @@ -588,6 +589,7 @@ class WikiMediaJob extends MediaJob public function downloadPodcastItemIfNew($item, &$podcast, $age) { $group_model = $this->group_model; + $controller = new CrawlController(); //only need for clean() method $pubdate = (empty($item['pubdate'])) ? time(): (is_int($item['pubdate']) ? $item['pubdate'] : strtotime($item['pubdate'])); @@ -613,6 +615,7 @@ class WikiMediaJob extends MediaJob $podcast['LANGUAGE']); $file_name = $this->makeFileNamePattern($file_name, $file_pattern, substr($item['title'], 0, C\NAME_LEN), $pubdate); + $file_name = $controller->clean($file_name, "file_name"); $type = UrlParser::getDocumentType($file_name); $data = $this->downloadPodcastItem($item['link'], $type); if ($data) {