First pass at converting files to use autoloading! Take care if you have an old yioop system you are upgrading, a=chris
First pass at converting files to use autoloading! Take care if you have an old yioop system you are upgrading, a=chris
<?php
/**
* SeekQuarry/Yioop --
* Open Source Pure PHP Search Engine, Crawler, and Indexer
*
* Copyright (C) 2009 - 2015 Chris Pollett chris@pollett.org
*
* LICENSE:
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* END LICENSE
*
* @author Chris Pollett chris@pollett.org
* @package seek_quarry
* @subpackage bin
* @license http://www.gnu.org/licenses/ GPL3
* @link http://www.seekquarry.com/
* @copyright 2009 - 2015
* @filesource
*/
if(php_sapi_name() != 'cli') {echo "BAD REQUEST"; exit();}
/**
* Calculate base directory of script
* @ignore
*/
define("BASE_DIR", substr(
dirname(realpath($_SERVER['PHP_SELF'])), 0,
-strlen("/bin")));
ini_set("memory_limit", "1300M");
/** Load in global configuration settings */
require_once BASE_DIR.'/configs/config.php';
if(!PROFILE) {
echo "Please configure the search engine instance by visiting" .
"its web interface on localhost.\n";
exit();
}
/** CRAWLING means don't try to use memcache
* @ignore
*/
define("NO_CACHE", true);
/** We do want logging, but crawl model and other will try to turn off
* if we don't set this
*/
define("NO_LOGGING", false);
/**
* Shortest time through one iteration of news updater's loop
*/
define("MINIMUM_UPDATE_LOOP_TIME", 10);
/** for crawlDaemon function */
require_once BASE_DIR."/lib/crawl_daemon.php";
/** To guess language based on page encoding */
require_once BASE_DIR."/lib/locale_functions.php";
/** Loads common constants for web crawling*/
require_once BASE_DIR."/lib/crawl_constants.php";
/** Used to reparse wiki pages after a media conversion */
require_once BASE_DIR."/lib/wiki_parser.php";
/**Load base model class used by source model */
require_once BASE_DIR."/models/model.php";
/** Source model is used to manage news feed sites*/
if(file_exists(APP_DIR."/models/source_model.php")) {
require_once APP_DIR."/models/source_model.php";
} else {
require_once BASE_DIR."/models/source_model.php";
}
/** Group model is used to manage media conversion*/
if(file_exists(APP_DIR."/models/group_model.php")) {
require_once APP_DIR."/models/group_model.php";
} else {
require_once BASE_DIR."/models/group_model.php";
}
/*
* We'll set up multi-byte string handling to use UTF-8
*/
mb_internal_encoding("UTF-8");
mb_regex_encoding("UTF-8");
if (function_exists('lcfirst') === false) {
/**
* Lower cases the first letter in a string
*
* This function is only defined if the PHP version is before 5.3
* @param string $str string to be lower cased
* @return string the lower cased string
*/
function lcfirst( $str )
{
return (string)(strtolower(substr($str, 0, 1)).substr($str, 1));
}
}
/**
* Separate process/command-line script which can be used to update
* news sources for Yioop and also handle other kinds of activities such as
* video conversion. This is as an alternative to using the web app
* for updating. Makes use of the web-apps code.
*
* @author Chris Pollett
* @package seek_quarry
*/
class MediaUpdater implements CrawlConstants
{
/**
* The last time feeds were checked for updates
* @var int
*/
var $update_time;
/**
* Sets up the field variables so that media updating can begin
*/
function __construct()
{
$this->delete_time = 0;
$this->retry_time = 0;
$this->update_time = 0;
}
/**
* This is the function that should be called to get the media_updater to
* start to start updating. Calls init to handle the command-line
* arguments then enters news_updaters main loop
*/
function start()
{
global $argv;
CrawlDaemon::init($argv, "media_updater");
crawlLog("\n\nInitialize logger..", "media_updater", true);
$this->sourceModel = new SourceModel();
$this->groupModel = new GroupModel();
$this->loop();
}
/**
* Main loop for the news updater.
*/
function loop()
{
crawlLog("In News Update Loop");
$info[self::STATUS] = self::CONTINUE_STATE;
$local_archives = array("");
while (CrawlDaemon::processHandler()) {
$start_time = microtime();
crawlLog("Checking if news feeds should be updated...");
$this->newsUpdate();
$this->mediaConversion();
$sleep_time = max(0, ceil(
MINIMUM_UPDATE_LOOP_TIME - changeInMicrotime($start_time)));
if($sleep_time > 0) {
crawlLog("Ensure minimum loop time by sleeping...".$sleep_time);
sleep($sleep_time);
}
} //end while
crawlLog("News Updater shutting down!!");
}
/**
* If news_update time has passed, then updates news feeds associated with
* this Yioop instance
*
* @param array $data used by view to render itself. In this case, if there
* is a problem updating the news then we will flash a message
* @param bool $no_news_process if true than assume media_updater.php is
* not running. If false, assume being run from media_updater.php so
* update news_process cron time.
*/
function newsUpdate()
{
$time = time();
$something_updated = false;
$delta = $time - $this->update_time;
// every hour get items from feeds
if($delta > ONE_HOUR) {
$this->update_time = $time;
crawlLog("Performing news feeds update");
$this->sourceModel->updateFeedItems(ONE_WEEK);
$something_updated = true;
}
/*
if anything changed rebuild shard
*/
if($something_updated) {
crawlLog("Deleting feed items and rebuild shard...");
$this->sourceModel->rebuildFeedShard(ONE_WEEK);
crawlLog("... delete complete, shard rebuilt");
} else {
crawlLog("No updates needed.");
}
}
/**
* Checks for schedules of media to convert in the media_convert
* schedules folder. If it find a schedules, it reads it and tries to
* do the conversions. For now the conversions are limited to creating
* webm and mp4 files corresponding to a video file listed in a schedule.
* For this method to run, ffmpeg must be installed and the constant
* FFMPEG must be set to the path to ffmpeg.
*/
function mediaConversion()
{
$convert_folder = WORK_DIRECTORY. "/schedules/media_convert";
if(!defined('FFMPEG') || !file_exists($convert_folder)) { return; }
crawlLog("Checking for media files to convert...");
$to_convert = glob($convert_folder. "/*.txt");
if(!isset($to_convert[0])) {
crawlLog("...No media files found.");
return;
}
$convert_file_struct = file($to_convert[0]);
unlink($to_convert[0]);
if(!isset($convert_file_struct[2])) {
crawlLog("...Media conversion file mis-formatted... skipping");
return;
}
$group_model = $this->groupModel;
list($page_id, $media_path, $thumb_path, $media_file_name) =
$convert_file_struct;
$page_id = trim($page_id);
$info = $group_model->getPageInfoByPageId($page_id);
if(!$info) {
crawlLog("...Media page lookup failed ... skipping");
return;
}
$group_id = $info['GROUP_ID'];
$locale_tag = $info['LOCALE_TAG'];
$page_name = $info['PAGE_NAME'];
$full_info = $group_model->getPageInfoByName($group_id, $page_name,
$locale_tag, 'edit');
if(!$full_info) {
crawlLog("...Full Media page lookup failed ... skipping");
return;
}
$media_base_name = $media_file_name;
$media_path = trim($media_path);
$thumb_path = trim($thumb_path);
if(($pos = strrpos($media_file_name, ".")) !== false) {
$media_base_name = substr($media_file_name, 0, $pos);
}
$input_media_file = "$media_path/$media_file_name";
$mp4_file = "$media_path/$media_base_name.mp4";
$webm_file = "$media_path/$media_base_name.webm";
if($input_media_file != $mp4_file) {
$ffmpeg = FFMPEG . " -i \"$input_media_file\" -y".
" -vcodec h264 -acodec aac -preset veryfast -crf 28 ".
"-strict -2 \"$mp4_file\"";
crawlLog("Converting " . $media_file_name . " to mp4...");
$output = system($ffmpeg);
CrawlDaemon::processHandler(true); //prevent timeout killing process
$make_thumb_string =
FFMPEG." -i \"$mp4_file\" -vframes 1 -map 0:v:0".
" -vf \"scale=".THUMB_DIM.":".THUMB_DIM."\" ".
"\"$thumb_path/$media_base_name.mp4.jpg\" 2>&1";
crawlLog($output);
crawlLog("Making thumb with $make_thumb_string");
exec($make_thumb_string);
clearstatcache("$thumb_path/$media_base_name.mp4.jpg");
}
if($input_media_file != $webm_file) {
$ffmpeg = FFMPEG . " -i \"$input_media_file\" -y".
" -vcodec libvpx \"$webm_file\"";
crawlLog("Converting " . $media_file_name . " to webm...");
$output = system($ffmpeg);
CrawlDaemon::processHandler(true); //prevent timeout killing process
$make_thumb_string =
FFMPEG." -i \"$webm_file\" -vframes 1 -map 0:v:0".
" -vf \"scale=".THUMB_DIM.":".THUMB_DIM."\" ".
"\"$thumb_path/$media_base_name.webm.jpg\" 2>&1";
crawlLog($output);
crawlLog("Making thumb with $make_thumb_string");
exec($make_thumb_string);
clearstatcache("$thumb_path/$media_base_name.webm.jpg");
}
crawlLog("...media file conversion done...");
$parser = new WikiParser();
$parsed_page = $parser->parse($full_info["PAGE"]);
if($parsed_page) {
crawlLog("... re-parsing media page...");
$parsed_page = $group_model->insertResourcesParsePage($group_id,
$page_id, $locale_tag, $parsed_page);
$sql = "UPDATE GROUP_PAGE SET PAGE=? WHERE ID = ?";
$group_model->db->execute($sql, array($parsed_page, $page_id));
}
}
}
/*
* Instantiate and runs the MediaUpdater program
*/
$media_updater = new MediaUpdater();
$media_updater->start();
?>