Some more tweaks to initials handling, a=chris

Chris Pollett [2012-12-16 16:Dec:th]
Some more tweaks to initials handling, a=chris
Filename
lib/phrase_parser.php
locale/ar/configure.ini
locale/de/configure.ini
locale/en-US/configure.ini
locale/es/configure.ini
locale/fa/configure.ini
locale/fr-FR/configure.ini
locale/he/configure.ini
locale/hi/configure.ini
locale/in-ID/configure.ini
locale/it/configure.ini
locale/ja/configure.ini
locale/kn/configure.ini
locale/ko/configure.ini
locale/pl/configure.ini
locale/pt/configure.ini
locale/ru/configure.ini
locale/th/configure.ini
locale/tr/configure.ini
locale/vi-VN/configure.ini
locale/zh-CN/configure.ini
tests/phrase_parser_test.php
diff --git a/lib/phrase_parser.php b/lib/phrase_parser.php
index 57f154859..b7d4268e4 100755
--- a/lib/phrase_parser.php
+++ b/lib/phrase_parser.php
@@ -184,11 +184,11 @@ class PhraseParser
         //create_function's
         static $replace_function0, $replace_function1, $replace_function2;

-        $acronym_pattern = "/\b[A-Za-z](\.\s*[A-Za-z])+(\.)?/";
+        $acronym_pattern = "/\b[A-Za-z](\.\s*[A-Za-z])+(\.|\b)/";
         if(!isset($replace_function0)) {
             $replace_function0 = create_function('$matches', '
                 $result = "_".mb_strtolower(
-                    mb_ereg_replace("\.", "", $matches[0]));
+                    mb_ereg_replace("\.\s*", "", $matches[0]));
                 return $result;');
         }
         $string = preg_replace_callback($acronym_pattern,
diff --git a/locale/ar/configure.ini b/locale/ar/configure.ini
index 9a5b42881..e7aab09d4 100755
--- a/locale/ar/configure.ini
+++ b/locale/ar/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/de/configure.ini b/locale/de/configure.ini
index c3af89e2b..f987fdcaf 100755
--- a/locale/de/configure.ini
+++ b/locale/de/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/en-US/configure.ini b/locale/en-US/configure.ini
index ff689665f..9f77ff776 100755
--- a/locale/en-US/configure.ini
+++ b/locale/en-US/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = "Yioop Cache Page... This page has been modifi
 ; search_controller.php line: 1311
 search_controller_cached_version = "This cached version of %s was obtained by the Yioop crawler on %s."
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = "Toggle History"
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = "All Cached Versions - Change Year and/or Months to see Links"
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = "Year:"
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = "Month:"
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = "Toggle Extracted Headers and Summaries"
 ;
 ; settings_controller.php line: 134
@@ -559,7 +559,7 @@ crawlstatus_view_no_fetcher = "No Fetcher Queries Yet"
 crawlstatus_view_most_recent_urls = "Most Recent Urls"
 ;
 ; crawlstatus_view.php line: 184
-crawlstatus_view_no_recent_urls = "No Recent Urls"
+crawlstatus_view_no_recent_urls = "No Recent Urls (Could mean only link data)"
 ;
 ; crawlstatus_view.php line: 188
 crawlstatus_view_previous_crawls = "Previous Crawls"
diff --git a/locale/es/configure.ini b/locale/es/configure.ini
index 9e3214bad..712627197 100755
--- a/locale/es/configure.ini
+++ b/locale/es/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = "Página de cache Yioop ... Esta p&aacut
 ; search_controller.php line: 1311
 search_controller_cached_version = "Esta versión en caché de %s se obtuvo mediante el rastreador Yioop en %s."
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/fa/configure.ini b/locale/fa/configure.ini
index f67b169c0..961f3d231 100755
--- a/locale/fa/configure.ini
+++ b/locale/fa/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/fr-FR/configure.ini b/locale/fr-FR/configure.ini
index 50013c3fc..9be588daf 100755
--- a/locale/fr-FR/configure.ini
+++ b/locale/fr-FR/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/he/configure.ini b/locale/he/configure.ini
index 3f68481e1..8d8f705eb 100755
--- a/locale/he/configure.ini
+++ b/locale/he/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/hi/configure.ini b/locale/hi/configure.ini
index 0774059f3..0be2baa63 100755
--- a/locale/hi/configure.ini
+++ b/locale/hi/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/in-ID/configure.ini b/locale/in-ID/configure.ini
index e8f3ecf33..342f6c530 100755
--- a/locale/in-ID/configure.ini
+++ b/locale/in-ID/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/it/configure.ini b/locale/it/configure.ini
index a51fcd27b..07c4d38df 100755
--- a/locale/it/configure.ini
+++ b/locale/it/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = "Pagina archiviata Yioop... Questa pagina &egr
 ; search_controller.php line: 1311
 search_controller_cached_version = "Questa versione archiviata di %s è stata ottenuta dal Crawler di Yioop il %s."
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/ja/configure.ini b/locale/ja/configure.ini
index 2a6de9e64..27546bf77 100755
--- a/locale/ja/configure.ini
+++ b/locale/ja/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = "%sのこのキャッシュされたバージョンは%sのウィオップから入手しました。"
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/kn/configure.ini b/locale/kn/configure.ini
index d53a063aa..a1ddaa358 100755
--- a/locale/kn/configure.ini
+++ b/locale/kn/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/ko/configure.ini b/locale/ko/configure.ini
index 7af0d50d2..07e0b9b8a 100755
--- a/locale/ko/configure.ini
+++ b/locale/ko/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = "현재 캐시 버젼 %s 은 Yioop 크롤 %s 에 의하여 얻어 졌습니다. "
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/pl/configure.ini b/locale/pl/configure.ini
index 929e8e812..4691ab01e 100755
--- a/locale/pl/configure.ini
+++ b/locale/pl/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/pt/configure.ini b/locale/pt/configure.ini
index b827a1c09..4fb5d8685 100755
--- a/locale/pt/configure.ini
+++ b/locale/pt/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/ru/configure.ini b/locale/ru/configure.ini
index 340f81076..e6eceafeb 100755
--- a/locale/ru/configure.ini
+++ b/locale/ru/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/th/configure.ini b/locale/th/configure.ini
index 33af79884..a4a377a4f 100755
--- a/locale/th/configure.ini
+++ b/locale/th/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/tr/configure.ini b/locale/tr/configure.ini
index 16a189f9f..6f2da1906 100755
--- a/locale/tr/configure.ini
+++ b/locale/tr/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/vi-VN/configure.ini b/locale/vi-VN/configure.ini
index 305cdd15a..4abc54df5 100755
--- a/locale/vi-VN/configure.ini
+++ b/locale/vi-VN/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = "Trang gốc này: %s đã tìm được bởi công cụ tìm kiẽm Yioop vào ngày %s."
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/locale/zh-CN/configure.ini b/locale/zh-CN/configure.ini
index 421ce8e20..8842ac87f 100755
--- a/locale/zh-CN/configure.ini
+++ b/locale/zh-CN/configure.ini
@@ -445,19 +445,19 @@ search_controller_cache_comment = ""
 ; search_controller.php line: 1311
 search_controller_cached_version = ""
 ;
-; search_controller.php line: 1317
+; search_controller.php line: 1319
 search_controller_history = ""
 ;
-; search_controller.php line: 1415
+; search_controller.php line: 1417
 search_controller_all_cached = ""
 ;
-; search_controller.php line: 1439
+; search_controller.php line: 1441
 search_controller_year = ""
 ;
-; search_controller.php line: 1440
+; search_controller.php line: 1442
 search_controller_month = ""
 ;
-; search_controller.php line: 1510
+; search_controller.php line: 1512
 search_controller_header_summaries = ""
 ;
 ; settings_controller.php line: 134
diff --git a/tests/phrase_parser_test.php b/tests/phrase_parser_test.php
index dafc7850a..30bbbdb99 100644
--- a/tests/phrase_parser_test.php
+++ b/tests/phrase_parser_test.php
@@ -68,6 +68,16 @@ class PhraseParserTest extends UnitTest
      */
     public function extractPhrasesTestCase()
     {
+        $phrase_string = <<< EOD
+Dr. T.Y Lin's home page. J. R. R. Tolkien
+EOD;
+        $word_lists = PhraseParser::extractPhrasesInLists($phrase_string,
+            "en-US", true);
+        $words = array_keys($word_lists);
+        $this->assertTrue(in_array("dr", $words), "Abbreviation 1");
+        $this->assertTrue(in_array("_ty", $words),"Initials 1");
+        $this->assertTrue(in_array("_jrr", $words),"Initials 2");
+
         $phrase_string = <<< EOD
 THE THE
 ‘Deep Space nine’ ‘Deep Space’ version of GIANT
ViewGit