Last commit for src/configs/GroupWikiTool.php: 9bd65ef0554db88e414420bfaae7791b9d6e9275

2nd pass at importer for dokuwiki's into yioop

Chris Pollett [2024-06-19 16:Jun:th]
2nd pass at importer for dokuwiki's into yioop
<?php
/**
 * SeekQuarry/Yioop --
 * Open Source Pure PHP Search Engine, Crawler, and Indexer
 *
 * Copyright (C) 2009 - 2024  Chris Pollett chris@pollett.org
 *
 * LICENSE:
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * END LICENSE
 *
 * GroupWikiTool is used to manage the integrity of resource folders for
 * wiki pages of Yioop Groups.
 *
 * A description of its usage is given in the $usage global variable
 *
 *
 * @author Chris Pollett
 * @license https://www.gnu.org/licenses/ GPL3
 * @link https://www.seekquarry.com/
 * @copyright 2009 - 2024
 * @filesource
 */

namespace seekquarry\yioop\configs;

use seekquarry\yioop\configs as C;
use seekquarry\yioop\library as L;
use seekquarry\yioop\library\VersionManager;
use seekquarry\yioop\models\Model;
use seekquarry\yioop\models\GroupModel;

if (php_sapi_name() != 'cli' ||
    defined("seekquarry\\yioop\\configs\\IS_OWN_WEB_SERVER")) {
    echo "BAD REQUEST"; exit();
}
/** Loads common utility functions*/
require_once __DIR__."/../library/Utility.php";
require_once __DIR__."/../library/LocaleFunctions.php";
ini_set("memory_limit", C\ARC_TOOL_MEMORY_LIMIT);
if (!C\PROFILE) {
    echo "Please configure the search engine instance by visiting" .
        "its web interface on localhost.\n";
    exit();
}
/**
 * Used to print out a description of how to use GroupWikiTool.php
 * @var string
 */
$usage = <<<EOD
GroupWikiTool.php
==============

GroupWikiTool is used to manage the integrity of resource folders for
wiki pages of Yioop Groups. The tool has a command to look up what is
the path for a wiki page of a given group for a locale. To maintain
previous version of wiki page resources, Yioop writes a .archive folder
in the wiki page's resource folder and uses it to maintain
previous versions of this folder. Before changes to the .archive
folder are made, a LOCK file is written. In the event of a crash before
completion of an operation, this LOCK file might be present and
prevent further changes to the resources in a wiki. This tool let's one
clear this lock. It also allows one to remove the existing .archive folder
and rebuild it from scratch. Finally, it allows a user to save a new version
snapshot of the current resource folder.

Usage
=====
php GroupWikiTool.php command folder

php GroupWikiTool.php clear-lock folder
  if folder is the name of a Group Wiki page resource folder, then this
  operation will remove any LOCK file on the .archive folder

php GroupWikiTool.php path group_name page_name locale_tag
  returns the resource and thumb folders for the given group, page,
  and locale.

php GroupWikiTool.php reset folder
  if folder is the name of a Group Wiki page
  resource folder, then this will delete the current .archive folder and replace
  it with a freshly computed one

php GroupWikiTool.php version folder
  if folder is the name of a Group Wiki page resource folder, then this
  will save a save a new version snapshot to the .archive subfolder

EOD;
if (empty($argv[2])) {
    $argv[2] = getcwd();
}
$num_args = count($argv);
if ( $num_args < 3 ) {
    echo $usage;
    exit();
}
switch ($argv[1]) {
    case "clear-lock":
        $lock_file = $argv[2] . "/.archive/LOCK";
        if (file_exists($argv[2])) {
            unlink($lock_file);
            echo "Group Wiki Page Resource Lock file removed!";
        }
        break;
    case "export":
        break;
    case "import":
        if (empty($argv[5])) {
            echo $usage;
            exit();
        }
        list(,, $type, $group_name, $locale_tag, $wiki_path,) = $argv;
        if ($type != "dokuwiki") {
            echo "Only importing from Dokuwiki's currently supported!\n";
        }
        importWiki($type, $group_name, $locale_tag, $wiki_path);
        break;
    case "path":
        if (empty($argv[4])) {
            $argv[4] = C\DEFAULT_LOCALE;
        }
        if (empty($argv[3])) {
            $argv[3] = "Main";
        }
        if (empty($argv[2])) {
            $argv[2] = "Public";
        }
        $group_model = new GroupModel();
        $group_id = $group_model->getGroupId($argv[2]);
        if (!$group_id) {
            echo "Could not locate that group name!!\n\n";
            echo $usage;
            exit();
        }
        $page_id = $group_model->getPageId($group_id, $argv[3], $argv[4]);
        if (!$page_id) {
            echo "Could not locate that page name in {$argv[2]} wiki!!\n\n";
            echo $usage;
            exit();
        }
        $folders =
            $group_model->getGroupPageResourcesFolders($group_id, $page_id);
        if (empty($folders[1])) {
            echo "{$argv[3]} page folders not yet created!!\n\n";
            echo $usage;
            exit();
        }
        echo "Resource folder: {$folders[0]}\nThumb folder: {$folders[1]}\n";
        break;
    case "reset":
        if (file_exists($argv[2] . "/.archive")) {
            $model = new Model();
            $db = $model->db;
            $db->unlinkRecursive($argv[2] . "/.archive");
        }
        $vcs = new VersionManager($argv[2]);
        break;
    case "version":
        $vcs = new VersionManager($argv[2]);
        $vcs->createVersion();
        break;
    default:
        echo $usage;
        exit();
}
/**
 *
 */
function importWiki($type, $group_name, $locale_tag, $wiki_path)
{
    if ($type != 'dokuwiki') {
        echo "$type is an unknown wiki type\n";
        return;
    }
    $group_model = new GroupModel();
    $group_id = $group_model->getGroupId($group_name);
    if ($group_id == -1) {
        echo "Group $group_name does not exist in this Yioop instance!\n";
        return;
    }
    $class_or_id = '0-9a-zA-Z\_\-\s\.\,\:\;\"\[\/\%\?\^' .
        "\]\{\}\(\)\!\|।\&\`\’\‘©®™℠…\/\>,。):、" .
        "”“《》(「」★【】·;!—―?!،؛؞؟٪٬٭\‚\‘";
    $doku_to_internals = [
        ["/^\s*======([$class_or_id]+)======/su", 'ZZH1ZZ$1ZZH1ZZ'],
        ["/(\A|\n)\s*======([$class_or_id]+)======/su", '\nZZH1ZZ$2ZZH1ZZ'],
        ["/(\A|\n)\s*=====([$class_or_id]+)=====/su", '\nZZH2ZZ$2ZZH2ZZ'],
        ["/(\A|\n)\s*====([$class_or_id]+)====/su", '\nZZH3ZZ$2ZZH3ZZ'],
        ["/(\A|\n)\s*===([$class_or_id]+)===/su", '\nZZH4ZZ$2ZZH4ZZ'],
        ["/(\A|\n)\s*==([$class_or_id]+)==/su", '\nZZH5ZZ$2ZZH5ZZ'],
        ["/(\A|\s|\n)\s*\*\*\s*([$class_or_id]+)\s*\*\*/su",
            '$1\'\'\'$2\'\'\''],
        ["@(\A|\s)//([$class_or_id]+)//@su", '$1\'\'$2\'\''],
        ["/(\A|\s)\_\_([$class_or_id]+)\_\_/su", '$1<u>$2<u>'],
        ["/(\A|\n)\s*\-/su", "$1#"],
        ["/(\A|\n)\s*\*/u", "$1*"],
        ["/\[([^\|]+)\s+\|([^\]]+)\]/su", "[$1|$2]"],
        ["/\[([^\|]+)\|\s+([^\]]+)\]/su", "[$1|$2]"],
        ["/\{\{\:?docs\:([^\|]+)\|([^\}\{]+)\}\}/su",
            '((resource:$1|$2))'],
        ["/\/\/$/u", "<br>\n"],
    ];
    $internal_to_yioops = [
        ["/ZZH1ZZ\s*([$class_or_id]+)\s*ZZH1ZZ/su", '=$1='],
        ["/ZZH2ZZ\s*([$class_or_id]+)\s*ZZH2ZZ/su", '==$1=='],
        ["/ZZH3ZZ\s*([$class_or_id]+)\s*ZZH3ZZ/su", '===$1==='],
        ["/ZZH4ZZ\s*([$class_or_id]+)\s*ZZH4ZZ/su", '====$1===='],
        ["/ZZH5ZZ\s*([$class_or_id]+)\s*ZZH5ZZ/su", '=====$1====='],
    ];
    $doku_matches = [];
    $doku_replaces = [];
    foreach ($doku_to_internals as $doku_to_internal) {
        list($doku_matches[], $doku_replaces[]) = $doku_to_internal;
    }
    $internal_matches = [];
    $internal_replaces = [];
    foreach ($internal_to_yioops as $internal_to_yioop) {
        list($internal_matches[], $internal_replaces[]) = $internal_to_yioop;
    }
    $documents = glob("$wiki_path/attic/*.txt.gz");
    $has_main_page = false;
    foreach ($documents as $pre_doc) {
        if (preg_match('/\/Main\.([^\/\.]+)\.txt\.gz$/', $pre_doc)) {
            $has_main_page = true;
            break;
        }
    }
    foreach ($documents as $pre_doc) {
        $document = gzdecode(file_get_contents($pre_doc));
        $document = preg_replace($doku_matches, $doku_replaces, $document);
        $document = preg_replace($internal_matches, $internal_replaces,
            $document);
        $document = str_replace('\\n',"\n", $document);
        $document = str_replace('\\\'',"'", $document);
        if (preg_match('/\/([^\/]+)\.([^\/\.]+)\.txt\.gz$/', $pre_doc,
            $matches)) {
            list(, $page_name, $timestamp) = $matches;
            if ($page_name == 'start' && !$has_main_page) {
                $page_name = 'Main';
            }
            $timestamp = intval($timestamp);
            if (!empty($timestamp) && !empty($page_name)) {
                echo "Inserting $page_name revision from ". date("Y-m-d H:i:s",
                    $timestamp) ."\n!";
                $group_model->setPageName(C\ROOT_ID,
                    $group_id, $page_name, $document,
                    $locale_tag, "[Wiki Import on " . time(). " ]",
                    L\tl('social_component_page_created', $page_name),
                    L\tl('social_component_page_discuss_here'),
                    pubdate: $timestamp);
            } else {
                echo "Skipping a revision of $page_name" .
                    " with timestamp $timestamp\n";
            }
        }
    }
}
ViewGit