Last commit for controllers/resource_controller.php: 9ff742e4cc2ef0dba312dd0c5f642890b6945730

First pass at converting files to use autoloading! Take care if you have an old yioop system you are upgrading, a=chris

Chris Pollett [2015-07-01 02:Jul:st]
First pass at converting files to use autoloading! Take care if you have an old yioop system you are upgrading, a=chris
<?php
/**
 *  SeekQuarry/Yioop --
 *  Open Source Pure PHP Search Engine, Crawler, and Indexer
 *
 *  Copyright (C) 2009 - 2012  Chris Pollett chris@pollett.org
 *
 *  LICENSE:
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 *  END LICENSE
 *
 * @author Chris Pollett chris@pollett.org
 * @package seek_quarry
 * @subpackage controller
 * @license http://www.gnu.org/licenses/ GPL3
 * @link http://www.seekquarry.com/
 * @copyright 2009 - 2012
 * @filesource
 */

if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}

/** Load base controller class if needed */
require_once BASE_DIR."/controllers/controller.php";
/** Loads common constants for web crawling*/
require_once BASE_DIR."/lib/crawl_constants.php";
/** Loads url_parser to clean resource name*/
require_once BASE_DIR."/lib/url_parser.php";

/**
 *  Used to serve resources, css, or scripts such as images from APP_DIR
 *
 * @author Chris Pollett
 * @package seek_quarry
 * @subpackage controller
 */
class ResourceController extends Controller implements CrawlConstants
{
    /**
     * No models used by this controller
     * @var array
     */
    var $models = array("crawl");
    /**
     * Only outputs JSON data so don't need view
     * @var array
     */
    var $views = array();
    /**
     * These are the activities supported by this controller
     * @var array
     */
    var $activities = array("get", "syncList", "syncNotify", "suggest");


    /**
     * Checks that the request seems to be coming from a legitimate fetcher
     * or mirror server then determines which activity  is being requested
     * and calls the method for that activity.
     *
     */
    function processRequest()
    {
        if(!isset($_REQUEST['a']) ||
            (!in_array($_REQUEST['a'], array("get", "suggest"))
            && !$this->checkRequest())) {return; }
        $activity = $_REQUEST['a'];

        if(in_array($activity, $this->activities)) {$this->$activity();}
    }

    /**
     * Gets the resource $_REQUEST['n'] from APP_DIR/$_REQUEST['f'] or
     * CRAWL_DIR/$_REQUEST['f']  after cleaning
     */
    function get()
    {
        if(!isset($_REQUEST['n']) || !isset($_REQUEST['f'])) return;
        $name = $this->clean($_REQUEST['n'], "string");
        if(in_array($_REQUEST['f'], array("css", "scripts", "resources"))) {
            /* notice in this case we don't check if request come from a
               legitimate source but we do try to restrict it to being
               a file (not a folder) in the above array
            */
            $folder = $_REQUEST['f'];
            $base_dir = APP_DIR."/$folder";
            $type = UrlParser::getDocumentType($name);
            $name = UrlParser::getDocumentFilename($name);
            $name = ($type != "") ? "$name.$type" :$name;
        } else if(in_array(
            $_REQUEST['f'], array("cache"))) {
            /*  perform check since these request should come from a known
                machine
            */
            if(!$this->checkRequest()) {
                return;
            }
            $folder = $_REQUEST['f'];
            $base_dir = CRAWL_DIR."/$folder";
        } else {
            return;
        }
        if(isset($_REQUEST['o']) && isset($_REQUEST['l'])) {
            $offset = $this->clean($_REQUEST['o'], "int");
            $limit = $this->clean($_REQUEST['l'], "int");
        }

        $path = "$base_dir/$name";
        $finfo = new finfo(FILEINFO_MIME);
        $mime_type = $finfo->file($path);
        if(file_exists($path)) {
            header("Content-type:$mime_type");
            if(isset($offset) && isset($limit)) {
                echo file_get_contents($path, false, NULL, $offset, $limit);
            } else {
                readfile($path);
            }
        }
    }

    /**
     * Used to get a keyword suggest trie. This sends additional
     * header so will be decompressed on the fly
     */
    function suggest()
    {
        if(!isset($_REQUEST["locale"])){return;}
        $locale = $_REQUEST["locale"];
        $count = preg_match("/^[a-zA-z]{2}(-[a-zA-z]{2})?$/", $locale);
        if($count != 1) {return;}
        $path = LOCALE_DIR."/$locale/resources/suggest_trie.txt.gz";
        if(file_exists($path)) {
            header("Content-Type: application/json");
            header("Content-Encoding: gzip");
            header("Content-Length: ".filesize($path));
            readfile($path);
        }
    }

    /**
     *  Used to notify a machine that another machine acting as a mirror
     *  is still alive. Data is stored in a txt file self::mirror_table_name
     */
    function syncNotify()
    {
        if(isset($_REQUEST['last_sync']) && $_REQUEST['last_sync'] > 0 ) {
            $mirror_table_name = CRAWL_DIR."/".self::mirror_table_name;
            $mirror_table = array();
            $time = time();
            if(file_exists($mirror_table_name) ) {
                $mirror_table = unserialize(
                    file_get_contents($mirror_table_name));
                if(isset($mirror_table['time']) &&
                    $mirror_table['time'] - $time > MIRROR_SYNC_FREQUENCY) {
                    $mirror_table = array();
                    // truncate table periodically to get rid of stale entries
                }
            }
            if(isset($_REQUEST['robot_instance'])) {
                $mirror_table['time'] = $time;
                $mirror_table['machines'][
                    $this->clean($_REQUEST['robot_instance'], "string")] =
                    array($_SERVER['REMOTE_ADDR'], $_REQUEST['machine_uri'],
                    $time,
                    $this->clean($_REQUEST['last_sync'], "int"));
                file_put_contents($mirror_table_name, serialize($mirror_table));
            }
        }
    }

    /**
     * Returns a list of syncable files and the modification times
     */
    function syncList()
    {
        $this->syncNotify();
        $info = array();
        if(isset($_REQUEST["last_sync"])) {
            $last_sync = $this->clean($_REQUEST["last_sync"], "int");
        } else {
            $last_sync = 0;
        }
        // substrings to exclude from our list
        $excludes = array(".DS", "__MACOSX", "queries", "QueueBundle", "tmp",
            "thumb");
        $sync_files = $this->crawlModel->getDeltaFileInfo(CRAWL_DIR."/cache",
            $last_sync, $excludes);

        if (count($sync_files) > 0 ) {
            $info[self::STATUS] = self::CONTINUE_STATE;
            $info[self::DATA] = $sync_files;
        } else {
            $info[self::STATUS] = self::NO_DATA_STATE;
        }
        echo base64_encode(gzcompress(serialize($info)));
    }
}
?>
ViewGit