viewgit/inc/functions.php:22 Function utf8_encode() is deprecated [8192]
<?php /** * SeekQuarry/Yioop -- * Open Source Pure PHP Search Engine, Crawler, and Indexer * * Copyright (C) 2009 - 2023 Chris Pollett chris@pollett.org * * LICENSE: * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. * * END LICENSE * * @author Chris Pollett chris@pollett.org * @license https://www.gnu.org/licenses/ GPL3 * @link https://www.seekquarry.com/ * @copyright 2009 - 2023 * @filesource */ namespace seekquarry\yioop\models; use seekquarry\yioop\configs as C; use seekquarry\yioop\library as L; use seekquarry\yioop\library\CrawlConstants; use seekquarry\yioop\library\UrlParser; /** * This class manages the editing of search verticals. This includes * allowing one to specify a search result should be filtered from * the results of a query, it also includes alterning the title and description * of a result from how it is stored in a particular index and it finally * includes creating, updating, deleting knowledge wiki results * To handle these activities this class leverages the existing group * wiki system of Yioop. Edited and filtered search results correspond * to group feed entries in a Search Group. Edited knowledge wiki entries * correspond to wiki entries in the Search Group. * * @author Chris Pollett */ class SearchverticalsModel extends GroupModel { /** * Used to hold an in_memory cache of what search results are to be * filtered * * @var array */ public $memory_filter = []; /** * Used to hold an in memory copy of the timestamp of the last time * a search result was altered * * @var int */ public $last_change = null; /** * Check if a URL is supposed to be filtered from search results. * * @param string $url to see if filtered * @param bool $compute_hash_key flag to control hashing before computing * GROUP_ITEM lookup. Sometimes $url might be already the hash of * a URL to check if filtered * @return bool whether the url should be filtered from search results or * not */ public function isFiltered($url, $compute_hash_key = false) { if (!empty($this->query_filter)) { if (in_array($url, $this->query_filter)) { return false; } } if ($compute_hash_key) { $host = UrlParser::getHost($url); } else { $host = $url; } if (isset($this->memory_filter[$host])) { return $this->memory_filter[$host]; } else { if (count($this->memory_filter) > 2 * C\MIN_RESULTS_TO_GROUP) { $this->memory_filter = []; } list($parent_id, $user_id) = $this->hashIntPair($host, $compute_hash_key); $db = $this->db; $sql = "SELECT * FROM GROUP_ITEM WHERE PARENT_ID=? AND " . "USER_ID = ? AND GROUP_ID = " . C\SEARCH_GROUP_ID . " AND TYPE = ". C\SEARCH_FILTER_GROUP_ITEM . " " . $db->limitOffset(1); $result = $db->execute($sql, [$parent_id, $user_id]); $this->memory_filter[$host] = true; if (!$result) { $this->memory_filter[$host] = false; } else { $row = $db->fetchArray($result); if (!$row) { $this->memory_filter[$host] = false; } } return $this->memory_filter[$host]; } } /** * Returns the timestamp of the last time any of the search results * were edited. * * @return int timestamp of last edited search results */ public function lastChange() { if ($this->last_change != null) { return $this->last_change; } $db = $this->db; $sql = "SELECT MAX(PUBDATE) AS LAST_CHANGE FROM GROUP_ITEM"; $result = $db->execute($sql); if (!$result) { return 0; } else { $row = $db->fetchArray($result); if (!$row) { return 0; } } $this->last_change = $row['LAST_CHANGE']; return $this->last_change; } /** * Computes a hash value as an ordered pair of ints. Used to store * filtered urls into the GROUP_ITEM table. In this situation, * the ordered pair is used later for the PARENT_ID and USER_ID in the * (both of which have indexes) Search group look-up. * * @param string $input to be hash to a pair of integers * @param bool $compute_hash flag to chheck if a crawlHash is done * before converting the result to an ordered pair. In some situations * the url or host of the url has already been hashed so don't want * to hash it again. * @return array [int, int] that corresponds to the hash of the input * to keep postgres happy (no unsigned ints) we make the value * of this function signed */ public function hashIntPair($input, $compute_hash = true) { if ($compute_hash) { $input = rtrim($input, "/"); $hash = substr(L\crawlHash($input . "/", true), 1); } else { $hash = $input; } $front = abs(L\unpackInt(substr($hash, 0, 4))) - 2147483648; $back = abs(L\unpackInt(substr($hash, 4, 4))) - 2147483648; return [$front, $back]; } /** * Given a $query and a $locale_tag returns a ordered sets * of urls to put at the top of the search results for that query * if such a map has been defined. * * @param string $query user supplied query * @param string $locale_tag language that the lookup of urls should * be done for * @return array of urls that correspond to the query */ public function getQueryMap($query, $locale_tag) { $db = $this->db; list($parent_id, $user_id) = $this->hashIntPair("$query $locale_tag"); $sql = "SELECT URL FROM GROUP_ITEM " . "WHERE GROUP_ID = " . C\SEARCH_GROUP_ID . " AND PARENT_ID=? AND " . "USER_ID = ? AND TYPE= '" . C\QUERY_MAP_GROUP_ITEM . "' ". "ORDER BY TITLE"; $result = $db->execute($sql, [$parent_id, $user_id]); if (!$result) { return []; } $map_urls = []; if ($result) { while ($row = $db->fetchArray($result)) { $map_urls[] = trim($row['URL']); } } return $map_urls; } /** * Stores a query map into the public database. * A query map associate a $query in a $locale_tag language to a set of * urls desired to be at the top of the search results. * @param string $query that triggers the mapping * @param array $map_urls urls that should appear at the top of the search * results * @param string $locale_tag for the language the map should apply to */ public function setQueryMap($query, $map_urls, $locale_tag) { $db = $this->db; list($parent_id, $user_id) = $this->hashIntPair("$query $locale_tag"); $delete_sql = "DELETE FROM GROUP_ITEM WHERE GROUP_ID = " . C\SEARCH_GROUP_ID . " AND PARENT_ID=? AND " . "USER_ID = ? AND TYPE= '" . C\QUERY_MAP_GROUP_ITEM . "'"; $time = time(); $db->execute($delete_sql, [$parent_id, $user_id]); $i = 0; foreach($map_urls as $url) { $out_num = sprintf("%'.04d", $i); $id = $this->addGroupItem($parent_id, C\SEARCH_GROUP_ID, $user_id, "$out_num", "", C\QUERY_MAP_GROUP_ITEM, $time, trim($url)); $i++; } } /** * Get the knowledge wiki page corresponding to a search query. * This is used in wiki read mode for search result verticals or edit mode * (the wiki info is not pre-parsed) for editing the knowledge wiki page. * * @param string $query to get knowledge wiki results for * @param string $locale_tag the locale tag language that one want the * results for * @param bool $edit_mode whether the wiki page should be pre-parsed * suitable for display in query results) or left unparsed (suitable * for editing). * @return array knowledge wiki page info */ public function getKnowledgeWiki($query, $locale_tag, $edit_mode = false) { $db = $this->db; $query = mb_strtolower(preg_replace("/-|\s/u", "_", $query)); if ($edit_mode) { $row = $this->getPageInfoByName(C\SEARCH_GROUP_ID, $query, $locale_tag, "edit"); } else { $sql = "SELECT ID, PAGE, DISCUSS_THREAD FROM GROUP_PAGE " . "WHERE GROUP_ID = " . C\SEARCH_GROUP_ID . " AND " . "TITLE = ? AND LOCALE_TAG = ? ". $db->limitOffset(0, 1); $result = $db->execute($sql, ["$query", $locale_tag]); if (!$result) { return false; } $row = $db->fetchArray($result); if (!$row) { return false; } } return $row; } /** * Delete the knowledge wiki pages associated with a given locale * * @param string $locale_tag locale to delete knowledge wiki pages for */ public function deleteKnowledgeWiki($locale_tag) { $db = $this->db; $sql = "SELECT ID FROM GROUP_PAGE WHERE GROUP_ID=" . C\SEARCH_GROUP_ID . " AND LOCALE_TAG=?"; $params = [$locale_tag]; $result = $db->execute($sql, $params); if ($result) { while ($row = $db->fetchArray($result)) { list($folder, $thumb_folder, ) = $this->getGroupPageResourcesFolders(C\SEARCH_GROUP_ID, $row['ID'], "", false, false); if (!empty($folder) && file_exists($folder)) { $db->unlinkRecursive($folder); } if (!empty($thumb_folder) && file_exists($thumb_folder)) { $db->unlinkRecursive($thumb_folder); } } } $sql = "DELETE FROM GROUP_PAGE WHERE GROUP_ID=" . C\SEARCH_GROUP_ID . " AND LOCALE_TAG=?"; $db->execute($sql, $params); $sql = "DELETE FROM GROUP_PAGE_HISTORY WHERE GROUP_ID=" . C\SEARCH_GROUP_ID . " AND LOCALE_TAG=?"; $db->execute($sql, $params); } /** * Updates the title and description text that will be presented * when a given url appears in search results * @param int $id if the url has been edited previous then the id of the * group item with the edit. If this is 0/empty then a new group item * for the edit is created. If -1 then deletes the entry * @param int $type either SEARCH_FILTER_GROUP_ITEM or * SEARCH_EDIT_GROUP_ITEM * @param string $url to change search result for * @param string $title new title for search result * @param string $description new snippet text for search result * @return mixed integer id of edited/created result or if used * to delete then false */ function updateUrlResult($id, $type, $url, $title, $description) { $db = $this->db; $this->last_change = time(); if ($type == C\SEARCH_FILTER_GROUP_ITEM) { $url = UrlParser::getHost($url); } list($parent_id, $user_id) = $this->hashIntPair($url); if (empty($id)) { $id = $this->addGroupItem($parent_id, C\SEARCH_GROUP_ID, $user_id, $title, $description, $type, $this->last_change, $url); } else { $item = $this->getEditedPageResult($url); $sql = "DELETE FROM GROUP_ITEM WHERE ID = ?"; $db->execute($sql, [$id]); unset($id); if (!empty($item['TYPE']) && $item['TYPE'] == C\SEARCH_FILTER_GROUP_ITEM) { $sql = "DELETE FROM GROUP_ITEM WHERE URL = ?"; $db->execute($sql, [$url . "/"]); } $id = $this->addGroupItem($parent_id, C\SEARCH_GROUP_ID, $user_id, $title, $description, $type, $this->last_change, $url); } return $id ?? false; } /** * Returns any edited search result associated with a url * * @param string $url url to get edit search results for * @return mixed either false if no edited search results or an * associative array containing edited results if they exist */ function getEditedPageResult($url) { $db = $this->db; $host = UrlParser::getHost($url); list($parent_id, $user_id) = $this->hashIntPair($host); $sql = "SELECT ID, TITLE, URL, DESCRIPTION, PUBDATE, EDIT_DATE, TYPE " . "AS URL_ACTION FROM GROUP_ITEM " . "WHERE GROUP_ID = " . C\SEARCH_GROUP_ID . " AND PARENT_ID=? AND " . "USER_ID = ? ". $db->limitOffset(0, 1); $result = $db->execute($sql, [$parent_id, $user_id]); if ($result) { $row = $db->fetchArray($result); if ($row && $row['URL_ACTION'] == C\SEARCH_FILTER_GROUP_ITEM) { return $row; } } list($parent_id, $user_id) = $this->hashIntPair($url); $result = $db->execute($sql, [$parent_id, $user_id]); if (!$result) { return false; } $row = $db->fetchArray($result); if (!$row) { return false; } return $row; } /** * Given an array page summaries, for each summary check if the * url corresponds to a search result that was human edited, if so, * replace and format it. * * @param array $results web pages summaries (these in turn are * arrays!) * @param array $words keywords (typically what was searched on) * @param int $description_length length of the description * @return array summaries which have been snippified and bold faced */ public function incorporateEditedPageResults($results, $words = null, $description_length = self::DEFAULT_DESCRIPTION_LENGTH) { if (isset($results['PAGES'])) { $pages = $results['PAGES']; $num_pages = count($pages); } else { $output['TOTAL_ROWS'] = 0; $output['PAGES'] = null; return; } $deleted_a_page = false; for ($i = 0; $i < $num_pages; $i++) { $page = $pages[$i]; if (!isset($page[self::URL])) { unset($pages[$i]); $deleted_a_page = true; continue; } $url_parts = explode("|", $page[self::URL]); if (count($url_parts) > 1) { $url = trim($url_parts[1]); } else { $url = $page[self::URL]; } if (($summary = $this->getEditedPageResult($url)) && !empty($summary["URL_ACTION"]) && $summary["URL_ACTION"] > 0) { $page[self::URL] = $url; foreach ([self::TITLE => "TITLE", self::DESCRIPTION => "DESCRIPTION"] as $result_field => $edit_field) { if (isset($summary[$edit_field])) { $page[$result_field] = $summary[$edit_field]; } } $page = $this->formatSinglePageResult($page, $words, self::DEFAULT_DESCRIPTION_LENGTH); $pages[$i] = $page; } } $output['TOTAL_ROWS'] = $results['TOTAL_ROWS']; $output['PAGES'] = ($deleted_a_page) ? $pages : array_values($pages); return $output; } }