Adjust copyrights years
<?php
/**
* SeekQuarry/Yioop --
* Open Source Pure PHP Search Engine, Crawler, and Indexer
*
* Copyright (C) 2009 - 2020 Chris Pollett chris@pollett.org
*
* LICENSE:
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* END LICENSE
*
*
* @author Chris Pollett chris@pollett.org
* @license https://www.gnu.org/licenses/ GPL3
* @link https://www.seekquarry.com/
* @copyright 2009 - 2020
* @filesource
*/
namespace seekquarry\yioop\library;
use seekquarry\yioop\configs as C;
/**
* For crawlHash
*/
require_once __DIR__ . "/Utility.php";
/**
* To convert to Iso639-2
*/
require_once __DIR__ . "/LocaleFunctions.php";
/**
*
*/
class ComputerVision
{
public static function ocrEnabled()
{
return C\nsdefined("TESSERACT");
}
/**
*
*/
public static function recognizeText($image_path,
$langs = [C\DEFAULT_LOCALE])
{
if (!C\nsdefined("TESSERACT")) {
return "";
}
$temp_dir = C\CRAWL_DIR . "/temp/";
if (!file_exists($temp_dir)) {
mkdir($temp_dir);
}
if (!file_exists($temp_dir)) {
return "";
}
$image_file_name = pathinfo($image_path, PATHINFO_BASENAME);
$iso_string = "";
$add = "";
foreach ($langs as $lang) {
$iso_lang = localeTagToIso639_2Tag($lang);
$iso_string .= $add . $iso_lang;
$add = "+";
}
$ocr_file = $temp_dir . $image_file_name . "-out";
$ocr_exec = C\TESSERACT . " $image_path $ocr_file -l $iso_string";
exec($ocr_exec);
$ocr_file .= ".txt";
$ocr_string = file_exists($ocr_file) ?
file_get_contents($ocr_file) : "";
@unlink($ocr_file);
return trim($ocr_string, " \t\n\r\0\x0B\x0C");
}
}