viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]

adding query tool, a=chris

Chris Pollett [2011-08-06 09:Aug:th]
adding query tool, a=chris
Filename
bin/query_tool.php
diff --git a/bin/query_tool.php b/bin/query_tool.php
new file mode 100644
index 000000000..531b80a21
--- /dev/null
+++ b/bin/query_tool.php
@@ -0,0 +1,173 @@
+<?php
+/**
+ *  SeekQuarry/Yioop --
+ *  Open Source Pure PHP Search Engine, Crawler, and Indexer
+ *
+ *  Copyright (C) 2009, 2010, 2011  Chris Pollett chris@pollett.org
+ *
+ *  LICENSE:
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  END LICENSE
+ *
+ * @author Chris Pollett chris@pollett.org
+ * @package seek_quarry
+ * @subpackage bin
+ * @license http://www.gnu.org/licenses/ GPL3
+ * @link http://www.seekquarry.com/
+ * @copyright 2009, 2010, 2011
+ * @filesource
+ */
+
+if(php_sapi_name() != 'cli') {echo "BAD REQUEST"; exit();}
+
+/** Calculate base directory of script @ignore*/
+define("BASE_DIR", substr(
+    dirname(realpath($_SERVER['PHP_SELF'])), 0,
+    -strlen("/bin")));
+
+/** Load in global configuration settings */
+require_once BASE_DIR.'/configs/config.php';
+if(!PROFILE) {
+    echo "Please configure the search engine instance by visiting" .
+        "its web interface on localhost.\n";
+    exit();
+}
+
+/**
+ * Load FileCache class in case used
+ */
+require_once(BASE_DIR."/lib/file_cache.php");
+
+/** NO_CACHE means don't try to use memcache*/
+define("NO_CACHE", true);
+
+/** USE_FILECACHE will let us use this tool to store long running
+ *  queries into the filecache
+ */
+if(USE_FILECACHE) {
+    $CACHE = new FileCache(WORK_DIRECTORY."/cache/queries");
+    define("USE_CACHE", true);
+} else {
+    define("USE_CACHE", false);
+}
+/** Loads common constants for web crawling*/
+require_once BASE_DIR."/lib/crawl_constants.php";
+
+/**Load base controller class, if needed. */
+require_once BASE_DIR."/controllers/search_controller.php";
+
+/*
+ *  We'll set up multi-byte string handling to use UTF-8
+ */
+mb_internal_encoding("UTF-8");
+mb_regex_encoding("UTF-8");
+
+/**
+ * Tool to provide a command line query interface to indexes stored in
+ * Yioop! database. Running with no arguments gives a help message for
+ * this tool.
+ *
+ * @author Chris Pollett
+ * @package seek_quarry
+ */
+class QueryTool implements CrawlConstants
+{
+    /**
+     * Initializes the QueryTool, for now does nothing
+     */
+    function __construct()
+    {
+
+    }
+
+    /**
+     * Runs the QueryTool on the supplied command line arguments
+     */
+    function start()
+    {
+        global $argv, $INDEXING_PLUGINS;
+
+        if(!isset($argv[1])) {
+            $this->usageMessageAndExit();
+        }
+
+        $query = $argv[1];
+        $results_per_page = (isset($argv[2])) ? $argv[2] : 10;
+        $limit = (isset($argv[3])) ? $argv[3] : 0;
+
+
+        $start_time = microtime();
+        $controller = new SearchController($INDEXING_PLUGINS);
+        $data = $controller->processQuery($query, "query", "",$results_per_page,
+            $limit);
+
+        foreach($data['PAGES'] as $page) {
+            echo "============\n";
+            echo "TITLE: ". trim($page[self::TITLE]). "\n";
+            echo "URL: ". trim($page[self::URL]). "\n";
+            echo "IPs: ";
+            foreach($page[self::IP_ADDRESSES] as $address) {
+                echo $address." ";
+            }
+            echo "\n";
+            echo "DESCRIPTION: ".wordwrap(trim($page[self::DESCRIPTION]))."\n";
+            echo "Rank: ".$page[self::DOC_RANK]."\n";
+            echo "Relevance: ".$page[self::RELEVANCE]."\n";
+            echo "Proximity: ".$page[self::PROXIMITY]."\n";
+            echo "Score: ".$page[self::SCORE]."\n";
+            echo "============\n\n";
+        }
+        $data['ELAPSED_TIME'] = changeInMicrotime($start_time);
+
+        echo "QUERY STATISTICS\n";
+        echo "============\n";
+        echo "ELAPSED TIME: ".$data['ELAPSED_TIME']."\n";
+        echo "LOW: ".$data['LIMIT']."\n";
+        echo "HIGH: ".min($data['TOTAL_ROWS'],
+            $data['LIMIT'] + $data['RESULTS_PER_PAGE'])."\n";
+        echo "TOTAL ROWS: ".$data['TOTAL_ROWS']."\n";
+    }
+
+
+    /**
+     * Outputs the "how to use this tool message" and then exit()'s.
+     */
+    function usageMessageAndExit()
+    {
+        echo "\nquery_tool.php is used to run Yioop!";
+        echo " query from the command line.\n For example,\n";
+        echo "  php query_tool.php 'chris pollett' \n returns results ".
+            "from the default index of a search on 'chris pollett'.\n";
+        echo "The general command format is:";
+        echo "  php query_tool.php query num_results start_num lang_tag\n";
+        exit();
+    }
+}
+
+/**
+ * Used within PhraseModel called from SearchController to do stemming
+ * @return string IANA tag either default or from the command line.
+ */
+function getLocaleTag()
+{
+    global $argv;
+
+    return  (isset($argv[4])) ? $argv[4] : DEFAULT_LOCALE;
+}
+
+$query_tool =  new QueryTool();
+$query_tool->start();
+?>
ViewGit