Last commit for models/model.php: bd5e03b977517bc873e5c986f3d7338982dd182b

Git ranames as bunch of files that had lower case starting letters, a=chris

Chris Pollett [2015-07-01 16:Jul:st]
Git ranames as bunch of files that had lower case starting letters, a=chris
<?php
/**
 *  SeekQuarry/Yioop --
 *  Open Source Pure PHP Search Engine, Crawler, and Indexer
 *
 *  Copyright (C) 2009 - 2014  Chris Pollett chris@pollett.org
 *
 *  LICENSE:
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 *  END LICENSE
 *
 * @author Chris Pollett chris@pollett.org
 * @package seek_quarry
 * @subpackage model
 * @license http://www.gnu.org/licenses/ GPL3
 * @link http://www.seekquarry.com/
 * @copyright 2009 - 2014
 * @filesource
 */
if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
/** Used to manage database connections */
require_once BASE_DIR."/models/datasources/".DBMS."_manager.php";
/** Used to handle curl and multi curl page requests */
require_once BASE_DIR."/lib/fetch_url.php";
/**  For crawlHash function  */
require_once BASE_DIR."/lib/utility.php";
/** For checking if a url is on localhost */
require_once BASE_DIR."/lib/url_parser.php";
/** Used to load common constants among crawl components */
require_once BASE_DIR."/lib/crawl_constants.php";
define("SCORE_PRECISION", 4);
define("TITLE_LENGTH", 20);
define("MAX_TITLE_LENGTH", 20);
define("SNIPPET_LENGTH_LEFT", 60);
define("SNIPPET_LENGTH_RIGHT", 50);
define("MIN_SNIPPET_LENGTH", 100);
/**
 *
 * This is a base class for all models
 * in the SeekQuarry search engine. It provides
 * support functions for formatting search results
 *
 * @author Chris Pollett
 *
 * @package seek_quarry
 * @subpackage model
 */
class Model implements CrawlConstants
{
    /**
     * Default maximum character length of a search summary
     */
    const DEFAULT_DESCRIPTION_LENGTH = 150;
    /** Reference to a DatasourceManager
     *  @var object
     */
    var $db;
    /** Name of the search engine database
     *  @var string
     */
    var $db_name;
    /**
     * Associative array of page summaries which might be used to
     * override default page summaries if set.
     * @var array
     */
    var $edited_page_summaries = NULL;
    /**
     *  These fields if present in $search_array (used by @see getRows() ),
     *  but with value "0", will be skipped as part of the where clause
     *  but will be used for order by clause
     *  @var array
     */
    var $any_fields = array();
    /**
     *  Associations of the form
     *      name of field for web forms => database column names/abbreviations
     * @var array
     */
    var $search_table_column_map = array();
    /**
     * Sets up the database manager that will be used and name of the search
     * engine database
     *
     * @param string $db_name  the name of the database for the search engine
     * @param bool $connect whether to connect to the database by default
     *      after making the datasource class
     */
    function __construct($db_name = DB_NAME, $connect = true)
    {
        $db_class = ucfirst(DBMS)."Manager";
        $this->db = new $db_class();
        if($connect) {
            $this->db->connect();
        }
        $this->db_name = $db_name;
    }
    /**
     * Given an array page summaries, for each summary extracts snippets which
     * are related to a set of search words. For each snippet, bold faces the
     * search terms, and then creates a new summary array.
     *
     * @param array $results web pages summaries (these in turn are
     *      arrays!)
     * @param array $words keywords (typically what was searched on)
     * @param int $description_length length of the description
     * @return array summaries which have been snippified and bold faced
     */
    function formatPageResults($results, $words = NULL, $description_length =
        self::DEFAULT_DESCRIPTION_LENGTH)
    {
        if(isset($results['PAGES'])) {
            $pages = $results['PAGES'];
            $num_pages = count($pages);
        } else {
            $output['TOTAL_ROWS'] = 0;
            $output['PAGES'] = NULL;
            return;
        }
        for($i = 0; $i < $num_pages; $i++) {
            $page = $pages[$i];

            if($this->edited_page_summaries != NULL) {

                $url_parts = explode("|", $page[self::URL]);
                if(count($url_parts) > 1) {
                    $url = trim($url_parts[1]);
                } else {
                    $url = $page[self::URL];
                }

                $hash_url = crawlHash($url, true);
                if(isset($this->edited_page_summaries[$hash_url])) {
                    $summary = $this->edited_page_summaries[$hash_url];
                    $page[self::URL] = $url;
                    foreach(array(self::TITLE, self::DESCRIPTION) as $field) {
                        if(isset($summary[$field])) {
                            $page[$field] = $summary[$field];
                        }
                    }

                }
            }
            if(!isset($page[self::TITLE])) {
                $page[self::TITLE] = "";
            }
            $page[self::TITLE] = strip_tags($page[self::TITLE]);

            if(strlen($page[self::TITLE]) == 0 ) {
                $offset =
                    min(mb_strlen($page[self::DESCRIPTION]), TITLE_LENGTH);
                $end_title = mb_strpos($page[self::DESCRIPTION], " ", $offset);
                $ellipsis = "";
                if($end_title > TITLE_LENGTH) {
                    $ellipsis = "...";
                    if($end_title > MAX_TITLE_LENGTH) {
                        $end_title = MAX_TITLE_LENGTH;
                    }
                }
                $page[self::TITLE] =
                    mb_substr(strip_tags($page[self::DESCRIPTION]), 0,
                        $end_title) . $ellipsis;
                //still no text revert to url
                if(strlen($page[self::TITLE]) == 0 && isset($page[self::URL])) {
                    $page[self::TITLE] = $page[self::URL];
                }
            }
            // do a little cleaning on text
            if($words != NULL) {
                $page[self::TITLE] =
                    $this->boldKeywords($page[self::TITLE], $words);

                if(!isset($page[self::IS_FEED])) {
                    $page[self::DESCRIPTION] =
                        $this->getSnippets(strip_tags($page[self::DESCRIPTION]),
                            $words, $description_length);
                }
                $page[self::DESCRIPTION] =
                    $this->boldKeywords($page[self::DESCRIPTION], $words);
            } else {
                $page[self::DESCRIPTION] =
                    mb_substr(strip_tags(
                        $page[self::DESCRIPTION]), 0, $description_length);
            }
            $page[self::SCORE] = mb_substr($page[self::SCORE], 0,
                SCORE_PRECISION);
            $pages[$i] = $page;
        }
        $output['TOTAL_ROWS'] = $results['TOTAL_ROWS'];
        $output['PAGES'] = $pages;
        return $output;
    }
    /**
     * Given a string, extracts a snippets of text related to a given set of
     * key words. For a given word a snippet is a window of characters to its
     * left and right that is less than a maximum total number of characters.
     * There is also a rule that a snippet should avoid ending in the middle of
     * a word
     *
     *  @param string $text haystack to extract snippet from
     *  @param array $words keywords used to make look in haystack
     *  @param string $description_length length of the description desired
     *  @return string a concatenation of the extracted snippets of each word
     */
    function getSnippets($text, $words, $description_length)
    {
        if(mb_strlen($text) < $description_length) {
            return $text;
        }

        $ellipsis = "";
        $out_words = array();
        foreach($words as $word) {
            $out_words = array_merge($out_words, explode(" ", $word));
        }
        $words = array_unique($out_words);
        $start_words = array_filter($words);
        $snippet_string = "";
        $snippet_hash = array();
        $text_sources = explode(".. ", $text);
        foreach($text_sources as $text_source) {
            $len = mb_strlen($text_source);
            $offset = 0;
            $words = $start_words;
            if(strlen($text_source) < MIN_SNIPPET_LENGTH) {
                if(!isset($snippet_hash[$text_source])) {
                    $found = false;
                    foreach($words as $word) {
                        if(mb_stristr($text_source, $word) !== false) {
                            $found = true;
                            break;
                        }
                    }
                    if($found) {
                        $snippet_string .= $ellipsis. $text_source;
                        $ellipsis = " ... ";
                        $snippet_hash[$text_source] = true;
                        if(mb_strlen($snippet_string)>= $description_length) {
                            break;
                        }
                    }
                }
                continue;
            }
            $word_locations = array();
            foreach($words as $word) {
                $qword = "/".preg_quote($word)."/ui";
                preg_match_all($qword, $text_source, $positions,
                    PREG_OFFSET_CAPTURE);

                if(isset($positions[0]) && is_array($positions[0])) {
                    $positions = $positions[0];
                    foreach($positions as $position) {
                        $word_locations[] = $position[1];
                    }
                }
            }
            $high = 0;
            sort($word_locations);
            foreach($word_locations as $pos) {
                if($pos < $high) continue;
                $pre_low = max($pos - SNIPPET_LENGTH_LEFT, 0);
                if($pre_low < mb_strlen($text_source)){
                    $low = mb_stripos($text_source, " ", $pre_low);
                }
                if($low > $pos) {
                    $low = $pre_low;
                }
                $pre_high = min($pos + SNIPPET_LENGTH_RIGHT, $len);
                $high = mb_stripos($text_source, " ",
                    max(min($pre_high - 10, 0), min($pos, $len)));
                if($high > $pre_high + 10){
                    $high = $pre_high;
                }
                $cur_snippet = trim(
                    mb_substr($text_source, $low, $high - $low));
                if(!isset($snippet_hash[$cur_snippet])) {
                    $snippet_string .= $ellipsis. $cur_snippet;
                    $ellipsis = " ... ";
                    $snippet_hash[$cur_snippet] = true;
                }
                if(strlen($snippet_string) >= $description_length) break 2;
            }
        }
        return $snippet_string;
    }
    /**
     *  Given a string, wraps in bold html tags a set of key words it contains.
     *
     *  @param string $text haystack string to look for the key words
     *  @param array $words an array of words to bold face
     *
     *  @return string  the resulting string after boldfacing has been applied
     */
    function boldKeywords($text, $words)
    {
        $words = array_unique($words);
        foreach($words as $word) {
            if($word != "" && !stristr($word, "/")) {
                $pattern = '/('.preg_quote($word).')/i';
                $new_text = preg_replace($pattern, '<b>$1</b>', $text);
                $text = $new_text;
            }
        }
        return $text;
    }
    /**
     * Gets a list of all DBMS that work with the search engine
     *
     *  @return array Names of availabledatasources
     */
    function getDbmsList()
    {
        $list = array();
        $data_managers = glob(BASE_DIR.'/models/datasources/*_manager.php');

        foreach($data_managers as $data_manager) {
            $dbms =
                substr($data_manager,
                    strlen(BASE_DIR.'/models/datasources/'), -
                    strlen("_manager.php"));
            if($dbms != 'datasource') {
                $list[] = $dbms;
            }
        }
        return $list;
    }
    /**
     * Returns whether the provided dbms needs a login and password or not
     * (sqlite or sqlite3)
     *
     * @param string $dbms the name of a database management system
     * @return bool true if needs a login and password; false otherwise
     */
    function loginDbms($dbms)
    {
        return !in_array($dbms, array("sqlite", "sqlite3"));
    }
    /**
     * Used to determine if an action involves just one yioop instance on
     * the current local machine or not
     *
     * @param array $machine_urls urls of yioop instances to which the action
     *      applies
     * @param string $index_timestamp if timestamp exists checks if the index
     *      has declared itself to be a no network index.
     * @return bool whether it involves a single local yioop instance (true)
     *      or not (false)
     */
    function isSingleLocalhost($machine_urls, $index_timestamp = -1)
    {
        if($index_timestamp >= 0) {
            $index_archive_name= self::index_data_base_name.$index_timestamp;
            if(file_exists(
                CRAWL_DIR."/cache/$index_archive_name/no_network.txt")){
                return true;
            }
        }
        return count($machine_urls) <= 1 &&
                    UrlParser::isLocalhostUrl($machine_urls[0]);
    }
    /**
     *  Used to get the translation of a string_id stored in the database to
     *  the given locale.
     *
     *  @param string $string_id id to translate
     *  @param string $locale_tag to translate to
     *  @return mixed translation if found, $string_id, otherwise
     */
    function translateDb($string_id, $locale_tag)
    {
        static $lookup = array();
        $db = $this->db;
        if(isset($lookup[$string_id])) {
            return $lookup[$string_id];
        }
        $sql = "
            SELECT TL.TRANSLATION AS TRANSLATION
            FROM TRANSLATION T, LOCALE L, TRANSLATION_LOCALE TL
            WHERE T.IDENTIFIER_STRING = :string_id AND
                L.LOCALE_TAG = :locale_tag AND
                L.LOCALE_ID = TL.LOCALE_ID AND
                T.TRANSLATION_ID = TL.TRANSLATION_ID " . $db->limitOffset(1);
        $result = $db->execute($sql,
            array(":string_id" => $string_id, ":locale_tag" => $locale_tag));
        $row = $db->fetchArray($result);
        if(isset($row['TRANSLATION'])) {
            return $row['TRANSLATION'];
        }
        return $string_id;
    }
    /**
     *  Get the user_id associated with a given username
     *  (In base class as used as an internal method in both signin and
     *   user models)
     *
     *  @param string $username the username to look up
     *  @return string the corresponding userid
     */
    function getUserId($username)
    {
        $db = $this->db;
        $sql = "SELECT USER_ID FROM USERS WHERE
            UPPER(USER_NAME) = UPPER(?) ". $db->limitOffset(1);
        $result = $db->execute($sql, array($username));
        if(!$result) {
            return false;
        }
        $row = $db->fetchArray($result);
        $user_id = $row['USER_ID'];
        return $user_id;
    }
    /**
     *  Creates the WHERE and ORDER BY clauses for a query of a Yioop
     *  table such as USERS, ROLE, GROUP, which have associated search web
     *  forms. Searches are case insensitive
     *
     *  @param array $search_array each element of this is a quadruple
     *      name of a field, what comparison to perform, a value to check,
     *      and an order (ascending/descending) to sort by
     *  @param array $any_fields these fields if present in search array
     *      but with value "0" will be skipped as part of the where clause
     *      but will be used for order by clause
     *  @return array string for where clause, string for order by clause
     */
    function searchArrayToWhereOrderClauses($search_array,
        $any_fields = array('status'))
    {
        $db = $this->db;
        $where = "";
        $order_by = "";
        $order_by_comma = "";
        $where_and = "";
        $sort_types = array("ASC", "DESC");
        foreach($search_array as $row) {
            $field_name = $this->search_table_column_map[$row[0]];
            $comparison = $row[1];
            $value = $row[2];
            $sort_dir = $row[3];
            if($value != "" && (!in_array($row[0], $any_fields)
                || $value != "0")) {
                if($where == "") {
                    $where = " WHERE ";
                }
                $where .= $where_and;
                switch($comparison) {
                    case "=":
                         $where .= "$field_name='".
                            $db->escapeString($value)."'";
                    break;
                    case "!=":
                         $where .= "$field_name!='".
                            $db->escapeString($value)."'";
                    break;
                    case "CONTAINS":
                         $where .= "UPPER($field_name) LIKE UPPER('%".
                            $db->escapeString( $value)."%')";
                    break;
                    case "BEGINS WITH":
                         $where .= "UPPER($field_name) LIKE UPPER('".
                            $db->escapeString( $value)."%')";
                    break;
                    case "ENDS WITH":
                         $where .= "UPPER($field_name) LIKE UPPER('%".
                            $db->escapeString( $value)."')";
                    break;
                }
                $where_and = " AND ";
            }
            if(in_array($sort_dir, $sort_types)) {
                if($order_by == "") {
                    $order_by = " ORDER BY ";
                }
                $order_by .= $order_by_comma.$field_name." ".$sort_dir;
                $order_by_comma = ", ";
            }
        }
        return array($where, $order_by);
    }
    /**
     *  Gets a range of rows which match the procided search criteria from
     *  $th provided table
     *
     * @param int $limit starting row from the potential results to return
     * @param int $num number of rows after start row to return
     * @param int &$total_rows gets set with the total number of rows that
     *      can be returned by the given database query
     * @param array $search_array each element of this is a
     *      quadruple name of a field, what comparison to perform, a value to
     *      check, and an order (ascending/descending) to sort by
     * @param array $args
     * @return array
     */
    function getRows($limit = 0, $num = 100, &$total,
        $search_array = array(), $args = NULL)
    {
        $db = $this->db;
        $tables = $this->fromCallback($args);
        $limit = $db->limitOffset($limit, $num);
        list($where, $order_by) =
            $this->searchArrayToWhereOrderClauses($search_array,
            $this->any_fields);
        $more_conditions = $this->whereCallback($args);
        if($more_conditions) {
            $add_where = " WHERE ";
            if($where != "") {
                $add_where = " AND ";
            }
            $where .= $add_where. $more_conditions;
        }
        $sql = "SELECT COUNT(*) AS NUM FROM $tables $where";
        $result = $db->execute($sql);
        $row = $db->fetchArray($result);
        $total = $row['NUM'];
        $select_columns = $this->selectCallback($args);
        $sql = "SELECT $select_columns FROM ".
            "$tables $where $order_by $limit";
        $result = $db->execute($sql);
        $i = 0;
        $row = array();
        $row_callback = false;
        while($rows[$i] = $db->fetchArray($result)) {
            $rows[$i] = $this->rowCallback($rows[$i], $args);
            $i++;
        }
        unset($rows[$i]); //last one will be null
        $rows = $this->postQueryCallback($rows);
        return $rows;
    }
    /**
     *  Controls which columns and the names of those columns from the tables
     *  underlying the given model should be return from a getRows call.
     *  This defaults to *, but in general will be overriden in subclasses of
     *  Model
     *
     *  @param mixed $args any additional arguments which should be used to
     *      determine the columns
     *  @return string a comma separated list of columns suitable for a SQL
     *      query
     */
    function selectCallback($args)
    {
        return "*";
    }
    /**
     *  Controls which tables and the names of tables
     *  underlie the given model and should be used in a getRows call
     *  This defaults to the single table whose name is whatever is before
     *  Model in the name of the model. For example, by default on FooModel
     *  this method would return "FOO". If a different behavior, this can be
     *  overriden in subclasses of Model
     *
     *  @param mixed $args any additional arguments which should be used to
     *      determine these tables
     *  @return string a comma separated list of tables suitable for a SQL
     *      query
     */
    function fromCallback($args)
    {
        $name = strtoupper(get_class($this));
        $name = substr($name, 0, -strlen("Model"));
        return $name;
    }
    /**
     *  Controls the WHERE clause of the SQL query that
     *  underlies the given model and should be used in a getRows call.
     *  This defaults to an empty WHERE clause.
     *
     *  @param mixed $args additional arguments that might be used to construct
     *      the WHERE clause.
     *  @return string a SQL WHERE clause
     */
    function whereCallback($args)
    {
        return "";
    }
    /**
     *  Called after as row is retrieved by getRows from the database to
     *  perform some manipulation that would be useful for this model.
     *  For example, in CrawlModel, after a row representing a crawl mix
     *  has been gotten, this is used to perform an additional query to marshal
     *  its components. By default this method just returns this row unchanged.
     *
     *  @param mixed $args additional arguments that might be used by this
     *      callback
     *  @return array $row after callback manipulation
     */
    function rowCallback($row, $args)
    {
        return $row;
    }
    /**
     *  Called after getRows has retrieved all the rows that it would retrieve
     *  but before they are returned to give one last place where they could
     *  be further manipulated. For example, in MachineModel this callback
     *  is used to make parallel network calls to get the status of each machine
     *  returned by getRows. The default for this method is to leave the
     *  rows that would be returned unchanged
     *
     *  @param array $rows that have been calculated so far by getRows
     *  @param mixed $args additional arguments that might be used by this
     *      callback
     *  @return array $rows after this final manipulation
     *
     */
    function postQueryCallback($rows)
    {
        return $rows;
    }
}
?>
ViewGit