Removes materialized meta code. Removes thesausrus based on wordnet code. On maximal phrases and single words stores up to 11 bytes of orginal as part of key. When shards now added to dictionary, dictionary info removed from shard. Does a check on amount of link text now before adding link as a link_doc to index, a=chris

Chris Pollett [2018-06-18 15:Jun:th]
Removes materialized meta code. Removes thesausrus based on wordnet code. On maximal phrases and single words stores up to 11 bytes of orginal as part of key. When shards now added to dictionary, dictionary info removed from shard. Does a check on amount of link text now before adding link as a link_doc to index, a=chris
Filename
src/configs/Config.php
src/configs/PublicHelpPages.php
src/controllers/SearchController.php
src/controllers/components/StoreComponent.php
src/css/search.css
src/executables/ArcTool.php
src/executables/Fetcher.php
src/executables/QueryTool.php
src/executables/QueueServer.php
src/library/CrawlConstants.php
src/library/IndexArchiveBundle.php
src/library/IndexDictionary.php
src/library/IndexManager.php
src/library/IndexShard.php
src/library/PhraseParser.php
src/library/Thesaurus.php
src/library/Utility.php
src/library/WebArchiveBundle.php
src/library/index_bundle_iterators/IndexBundleIterator.php
src/library/index_bundle_iterators/WordIterator.php
src/library/indexing_plugins/RecipePlugin.php
src/library/media_jobs/FeedsUpdateJob.php
src/locale/ar/configure.ini
src/locale/bn/configure.ini
src/locale/de/configure.ini
src/locale/en_US/configure.ini
src/locale/en_US/resources/Tokenizer.php
src/locale/es/configure.ini
src/locale/fa/configure.ini
src/locale/fr_FR/configure.ini
src/locale/he/configure.ini
src/locale/hi/configure.ini
src/locale/in_ID/configure.ini
src/locale/it/configure.ini
src/locale/ja/configure.ini
src/locale/kn/configure.ini
src/locale/ko/configure.ini
src/locale/nl/configure.ini
src/locale/pl/configure.ini
src/locale/pt/configure.ini
src/locale/ru/configure.ini
src/locale/te/configure.ini
src/locale/th/configure.ini
src/locale/tr/configure.ini
src/locale/vi_VN/configure.ini
src/locale/zh_CN/configure.ini
src/models/ParallelModel.php
src/models/PhraseModel.php
src/scripts/suggest.js
src/views/SearchView.php
tests/IndexShardTest.php
diff --git a/src/configs/Config.php b/src/configs/Config.php
index 47e137718..293cacc7f 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -710,6 +710,8 @@ nsconddefine('MAX_LINKS_PER_PAGE', 50);
 nsconddefine('AVG_LINKS_PER_PAGE', 24);
 /** maximum number of links to consider from a sitemap page */
 nsconddefine('MAX_LINKS_PER_SITEMAP', MEMORY_PROFILE * 80);
+/**  minimum char length of link text before gets its own document */
+nsconddefine('MIN_LINKS_TEXT_DOC', 6);
 /**  maximum number of words from links to consider on any given page */
 nsconddefine('MAX_LINKS_WORD_TEXT', 100);
 /**  maximum length of urls to try to queue, this is important for
diff --git a/src/configs/PublicHelpPages.php b/src/configs/PublicHelpPages.php
index ca3f33c2a..03444eaca 100644
--- a/src/configs/PublicHelpPages.php
+++ b/src/configs/PublicHelpPages.php
@@ -77,620 +77,620 @@ page_footer=

 sort=aname

-END_HEAD_VARS=Yioop Wiki Syntax=
-
-: Wiki syntax is a lightweight way to markup a text document so that
-it can be formatted and drawn nicely by Yioop.
-This page briefly describes the wiki syntax supported by Yioop.
-
-==Headings==
-: In wiki syntax headings of documents and sections are written as follows:
-
-<nowiki>
-=Level1=
-==Level2==
-===Level3===
-====Level4====
-=====Level5=====
-======Level6======
-</nowiki>
-
-and would look like:
-
-=Level1=
-==Level2==
-===Level3===
-====Level4====
-=====Level5=====
-======Level6======
-
-==Paragraphs==
-: In Yioop two new lines indicates a new paragraph. You can control
-the indent of a paragraph by putting colons followed by a space in front of it:
-
-<nowiki>
-: some indent
-
-:: a little more
-
-::: even more
-
-:::: that's sorta crazy
-</nowiki>
-
-which looks like:
-
-: some indent
-
-:: a little more
-
-::: even more
-
-:::: that's sorta crazy
-
-==Horizontal Rule==
-: Sometimes it is convenient to separate paragraphs or sections with a horizontal
-rule. This can be done by placing four hyphens on a line by themselves:
-<nowiki>
-----
-</nowiki>
-This results in a line that looks like:
-----
-
-==Text Formatting Within Paragraphs==
-: Within a paragraph it is often convenient to make some text bold, italics,
-underlined, etc. Below is a quick summary of how to do this:
-===Wiki Markup===
-{|
-|<nowiki>''italic''</nowiki>|''italic''
-|-
-|<nowiki>'''bold'''</nowiki>|'''bold'''
-|-
-|<nowiki>'''''bold and italic'''''</nowiki>|'''''bold and italic'''''
-|}
-
-===HTML Tags===
-: Yioop also supports several html tags such as:
-{|
-|<nowiki><del>delete</del></nowiki>|<del>delete</del>
-|-
-|<nowiki><ins>insert</ins></nowiki>|<ins>insert</ins>
-|-
-|<nowiki><s>strike through</s> or
-<strike>strike through</strike> </nowiki>|<s>strike through</s>
-|-
-|<nowiki><sup>superscript</sup> and
-<sub>subscript</sub></nowiki>|<sup>superscript</sup> and
-<sub>subscript</sub>
-|-
-|<nowiki><tt>typewriter</tt></nowiki>|<tt>typewriter</tt>
-|-
-|<nowiki><u>underline</u></nowiki>|<u>underline</u>
-|}
-
-===Spacing within Paragraphs===
-: The HTML entity
-<nowiki> </nowiki>
-can be used to create a non-breaking space. The tag
-<nowiki><br></nowiki>
-can be used to produce a line break.
-
-==Preformatted Text and Unformatted Text==
-: You can force text to be formatted as you typed it rather
-than using the layout mechanism of the browser using the
-<nowiki><pre>preformatted text tag.</pre></nowiki>
-Alternatively, a sequence of lines all beginning with a
-space character will also be treated as preformatted.
-
-: Wiki markup within pre tags is still parsed by Yioop.
-If you would like to add text that is not parsed, enclosed
-it in <tt><`mbox{nowiki}`> </`mbox{nowiki}`></tt> tags.
-
-==Styling Text Paragraphs==
-: Yioop wiki syntax offers a number of templates for
-control the styles, and alignment of text for
-a paragraph or group of paragraphs:<br />
-`{{`left| some text`}}`,<br /> `{{`right| some text`}}`,<br />
-and<br />
-`{{`center| some text`}}`<br /> can be used to left-justify,
-right-justify, and center a block of text. For example,
-the last command, would produce:
-{{center|
-some text
-}}
-If you know cascading style sheets (CSS), you can set
-a class or id selector for a block of text using:<br />
-`{{`class="my-class-selector" some text`}}`<br />and<br />
-`{{`id="my-id-selector" some text`}}`.<br />
-You can also apply inline styles to a block of text
-using the syntax:<br />
-`{{`style="inline styles" some text`}}`.<br />
-For example, `{{`style="color:red" some text`}}` looks
-like {{style="color:red" some text}}.
-
-==Lists==
-: The Yioop Wiki Syntax supported of ways of listing items:
-bulleted/unordered list, numbered/ordered lists, and
-definition lists. Below are some examples:
-
-===Unordered Lists===
-<nowiki>
-* Item1
-** SubItem1
-** SubItem2
-*** SubSubItem1
-* Item 2
-* Item 3
-</nowiki>
-would be drawn as:
-* Item1
-** SubItem1
-** SubItem2
-*** SubSubItem1
-* Item 2
-* Item 3
-
-===Ordered Lists===
-<nowiki>
-# Item1
-## SubItem1
-## SubItem2
-### SubSubItem1
-# Item 2
-# Item 3
-</nowiki>
-# Item1
-## SubItem1
-## SubItem2
-### SubSubItem1
-# Item 2
-# Item 3
-
-===Mixed Lists===
-<nowiki>
-# Item1
-#* SubItem1
-#* SubItem2
-#*# SubSubItem1
-# Item 2
-# Item 3
-</nowiki>
-# Item1
-#* SubItem1
-#* SubItem2
-#*# SubSubItem1
-# Item 2
-# Item 3
-
-===Definition Lists===
-<nowiki>
-;Term 1: Definition of Term 1
-;Term 2: Definition of Term 2
-</nowiki>
-;Term 1: Definition of Term 1
-;Term 2: Definition of Term 2
-
-==Tables==
-: A table begins with {`|`  and ends with `|`}. Cells are separated with | and
-rows are separated with |- as can be seen in the following
-example:
-<nowiki>
-{|
-|a||b
-|-
-|c||d
-|}
-</nowiki>
-{|
-|a||b
-|-
-|c||d
-|}
-Headings for columns and rows can be made by using an exclamation point, !,
-rather than a vertical bar |. For example,
-<nowiki>
-{|
-!a!!b
-|-
-|c||d
-|}
-</nowiki>
-{|
-!a!!b
-|-
-|c||d
-|}
-Captions can be added using the + symbol:
-<nowiki>
-{|
-|+ My Caption
-!a!!b
-|-
-|c||d
-|}
-</nowiki>
-{|
-|+ My Caption
-!a!!b
-|-
-|c||d
-|}
-Finally, you can put a CSS class or style attributes (or both) on the first line
-of the table to further control how it looks:
-<nowiki>
-{| class="wikitable"
-|+ My Caption
-!a!!b
-|-
-|c||d
-|}
-</nowiki>
-{| class="wikitable"
-|+ My Caption
-!a!!b
-|-
-|c||d
-|}
-Within a cell attributes like align, valign, styles, and class can be used. For
-example,
-<nowiki>
-{|
-| style="text-align:right;"| a| b
-|-
-| lalala | lalala
-|}
-</nowiki>
-{|
-| style="text-align:right;"| a| b
-|-
-| lalala | lalala
-|}
-
-==Math==
-
-: Math can be included into a wiki document by either using the math tag:
-<nowiki>
-<math>
-\sum_{i=1}^{n} i = frac{(n+1)(n)}{2}
-</math>
-</nowiki>
-
-<math>
-\sum_{i=1}^{n} i = frac{(n+1)(n)}{2}
-</math>
-
-or by enclosing the math in backticks:
-
-<pre>
-`[[1, -2],[3,4]]`
-</pre>
-
-`[[1, -2],[3,4]]`.
-
-Rendering of math is done using [[https://www.mathjax.org/|MathJax]], making us of the [[https://en.wikipedia.org/wiki/ASCIIMathML|ASCIImathml]] extensions.
-
-==Links and Relationships==
-: A hypertext link to another document can be inserted into a wiki page using
-the chain link icon in the GUI. Alternatively, there are several techniques
-for inserting a link into a page depending on whether the link is to a page
-within the same wiki group, is a link to a page on a different wiki
-group, or is a link to a different website. In addition to normal
-hypertext links, Yioop also supports relationship links.
-
-'''Intra-Group Wiki Links''' use the syntax:
-<nowiki>
-[[name_of_wiki_page]]
-or
-[[name_of_wiki_page|text for the link]]
-or
-[[name_of_wiki_page#heading_or_id_on_page|text for the link]]
-</nowiki>
-for example, to make a link to this Syntax page one could write,
-<nowiki>
-[[Syntax|Yioop Wiki Syntax Page]]
-</nowiki>
-which would look like,
-
-[[Syntax|Yioop Wiki Syntax Page]]
-
-'''Inter-Group Wiki Links''' use the syntax:
-<nowiki>
-[[name_of-group@name_of_wiki_page|text for the link]]
-</nowiki>
-
-'''Different Website Links''' use the syntax:
-<nowiki>
-[[website_url|text for the link]]
-</nowiki>
-
-: Relationships are a generalized form of link. They are used to express
-a more complicated linking between two wiki pages and have the syntax:
-
-<nowiki>
-[[relationship_type|wiki_page_name|text for the link]]
-</nowiki>
-
-: In the navigation dropdown for a Yioop wiki page there are items for
-what links to the current page and what relates to the current page
-based on the links and relationships a page belongs to.
-
-==Recent Places Dropdowns==
-: You can add a dropdown that can allow users to navigate to recently visited
-wiki pages using the syntax:
-
-<sub>`[`{recent_places}]</sub>
-
-This looks like:
-
-[{recent_places}]
-
-==Adding Resources to a Page==
-
-: Yioop wiki syntax supports adding search bars, audio, images, and video to a
-page. The magnifying class edit tool icon can be used to add a search bar via
-the GUI. This can also be added by hand with the syntax:
-<nowiki>
-{{search:default|size:small|placeholder:Search Placeholder Text}}
-</nowiki>
-This syntax is split into three parts each separated by a vertical bar |. The
-first part search:default means results from searches should come from the
-default search index. You can replace default with the timestamp of a specific
-index or mix if you do not want to use the default. The second group size:small
-indicates the size of the search bar to be drawn. Choices of size are small,
-medium, and large. Finally, placeholder:Search Placeholder Text indicates the
-grayed out background text in the search input before typing is done should
-read: Search Placeholder Text. Here is what the above code outputs:
-
-{{search:default|size:small|placeholder:Search Placeholder Text}}
-
-: Image, video and other media resources can be associated with a page by dragging
-and dropping them in the edit textarea or by clicking on the link click to select
-link in the gray box below the textarea. This would add wiki code such as
-
-<sub>((resource`:`myphoto.jpg|Resource Description))</sub>
-
-to the page. Only saving the page will save this code and upload the resource to
-the server. In the above ''myphoto.jpg'' is the resource that will be inserted and
-Resource Description is the alternative text to use in case the viewing browser
-cannot display jpg files. To add a resource
-from a different wiki page belonging to the same group to the current wiki
-page one can use a syntax like:
-
-<sub>((resource`:`Documentation:ConfigureScreenForm1.png|The work directory form))</sub>
-
-Here Documentation would be the page and ConfigureScreenForm1.png the resource.
-You can also insert resources from a data-string using ''resource-data'' rather than
-''resource''. For example:
-
-<sub>((resource-data`:`image/jpeg;base64,/9j/4AAQSkZJRg...rest of image data...|Seekquarry Logo))</sub>
-
-could be used to inline an image like:
-
-((resource-|The Seekquarry Logo))
-
-be aware though that the default maximum wiki page size is 512Kb (this can be set in src/configs/Config.php).
-
-: Sometimes it is useful to edit the basic resource link
-above to make a link which is a thumbnail of the resource which points to a
-separate page containing that resource. This can be done using the syntax:
-
-<sub>((resource-thumb`:`myphoto.jpg|Resource Description))</sub>
-
-: Similarly, by default for resources like PDFs, epub's, etc., the resource tag inlines
-the whole resource into the page, if instead one wants a clickable link to a page where
-the resource is displayed one can use the syntax:
-
-<sub>((resource-link`:`my_document.pdf|Resource Description))</sub>
-
-: Comma separated value files (.csv or CSV files) are inlined into a page as a table. Which rows and columns of the CSV to present in this table can be controlled by the resource line. The general format for including
-a CSV resource is:
-
-<sub> ((resource`:`resource_name.csv#config#top_left_cell#bottom_right_cell|Resource Description))</sub>
-
-For example,
-
-<sub>((resource`:`resource_name.csv##B2#C3|Resource Description))</sub>
-
-might output
-
-((resource-data:text/csv;base64,LCwsLAosLTIsMywsCiw1LDQsLAosLCwsCiwsLCwK##B2#C3|Example CSV with Headings))
-
-I.e., just the portion of the CSV given by the rectangle between the cells B2 and C3. Using a config directive we can omit the spreadsheet row and column headings as follows:
-
-<sub>((resource`:`resource_name.csv#noheadings#B2#C3|Resource Description)) </sub>
-
-which might output
-
-
-((resource-data:text/csv;base64,LCwsLAosLTIsMywsCiw1LDQsLAosLCwsCiwsLCwK#noheadings#B2#C3|Example CSV without Headings))
-
-CSV spreadsheet files can also be used to output a variety of charts. The general format for the command to insert a chart resource is:
-
-<sub>((resource-chart_type`:`resource_name.csv#char_config#x_start#x_end#y_start#y_end|Resource Description))</sub>
-
-Here ''chart_type'' can be one of ''bargraph'',  ''linegraph'', or ''pointgraph''. For example, one might have a line like:
-
-<sub>((resource-bargraph`:`resource_name.csv##B1#B4#C1#C4|Quadratic Function)) </sub>
-
-which could produce a chart like
-
-((resource-bargraph:##(1,1)#(2,4)#(3,9)#(4,16)|Quadratic Function))
-
-In the above example, the values for the `x` coordinates would come from the cells B1, B2, B3, B4 from
-''resource_name.csv '' and the values for the `y` coordinates would come from cells C1, C2, C3, C4 from
-''resource_name.csv ''. Alternatively, rather than use a CSV to get out data we can just list the points we want to plot with a command like:
-
-<sub>((resource-bargraph`:`##(1,1)#(2,4)#(3,9)#(4,16)|Quadratic Function))</sub>
-
-==Manipulating Page Resources==
-
-: A list of media that have already been associated with
-a page appears under the Page Resource heading below the textarea. This
-table allows the user to rename and delete resources as well as insert the
-same resource at multiple locations within the same document.
-
-: The resources section of the edit page can be thought of as similar to
-a folder in Windows or MacOS. One can have subfolders of the resource folder.
-
-: The '''Places''' dropdown at the top of the '''Page Resource''' section allows one to navigate
-these folders.
-
-: The '''Filter''' textfield lets you enter a search string.
-Clicking '''Go''' then shows only those resources
-which contain that search string in their title.
-
-: The '''Clip Folder''' dropdown is used to copy files between folders and pages.
-Its current value is the folder that the '''Clip Copy''' buttons next to resources
-will copy their resource to when clicked. You can set the '''Clip Folder''' to
-the current folder using the dropdown, then  move to the page and folder that
-you would like to copy stuff from and click the '''Clip Copy''' button of the
-desired resource.
-
-: The '''Name''', '''Size''', '''Modified''' header links above the resources list
-control the sort order for the resource list. If a page is a media list page,
-then even in read mode, the sort order selected is remembered when drawing the
-media list.
-
-: The '''Actions''' drop can be used to create new folders, new text files, and new csv
-text files within the current page resource folder. These are initial named beginning
-with ''untitled'' followed by some number, and if applicable a file extension.
-
-: Resources entries for the resources list consist first of an icon, followed by a textfield
-with a name for the resource, followed by buttons for actions that can be done to that resource
-(Rename, Add to Page, Clip Copy), followed by a link [X], which can be used to delete the resource.
-If a resource is editable the icon will look like a plus sign together with a pencil. Clicking
-on the icon will then let you edit the resource.
-
-===Text and CSV Resources===
-: For normal text files clicking edit will bring up a textarea with the context of the text to edit.
-For CSV (comma separated value) files this will present the file as an editable spreadsheet.
-Yioop spreadsheets can have equation much like Excel spreadsheets. Clicking on a cell lets one
-edit its contents. For example, if in the cell A3
-one entered the equation:
- = A1+A2
-then clicking out of the cell would cause it to refresh with the value of the sum of the contents of
-cells A1 and A2. In addition, to the standard arithmetic operators ['*', '/', '+', '-', '%'], the
-spreadsheet expressions can use float or integer literals, and can make use of the following table
-of built-in functions:
-
-{| class="wikitable"
-!Function Name!!Description
-|-
-!avg(x1,...,xn), avg(x1:xn)|| computes average of values of cells listed as arguments
-|-
-!ceil(x)|| rounds the value of x up to nearest integer
-|-
-!cell(i,j)|| returns the contents of the cell with column name of letter j, and row name i. For example, cell(2,'B') would return the contents of cell B2.
-|-
-!col(value, search_row, start_col, end_col)|| searches the row ''search_row'' between the columns
-''start_col'', ''end_col'' for ''value''. Returns the column name where this value was found or -1 if not found.
-For example, col(3, 2, "B", "D") might return C if the cell C2 had value 3.
-|-
-!exp(x)|| computes `e^x`
-|-
-!floor(x)|| rounds the value of x down to the nearest integer
-|-
-!log(x)|| computes `log x`
-|-
-!min(x1,...,xn), min(x1:xn)|| computes minimum value of cells listed as arguments
-|-
-!max(x1,...,xn), max(x1:xn)|| computes maximum value of cells listed as arguments
-|-
-!pow(x,y)|| computes `x^y`
-|-
-!row(value, search_col, start_row, end_row)|| searches the column ''search_col''
-between the rows ''start_row'', ''end_row'' for ''value''.
-Returns the row name where this value was found or -1 if not found.
-For example, row(3, "C", "1", "5") might return 2 if the cell C2 had value 3.
-|-
-!sqrt(x)|| computes `sqrt(x)`
-|-
-!sum(x1,...,xn), sum(x1:xn)|| computes sum of values of cells listed as arguments
-|-
-!username()|| returns username of the person using this CSV file
-|}
-
-===HTML, PDF and EPub Resources===
-: How HTML, PDF, EPub resources included on a page render depends on how the Yioop wiki software
-has been configured. If no special configuration has been done, then HTML and PDF documents
-will bbe rendered in an <iframe> tag within the current wiki page. In the EPub, case a link
-to download the resource will be given. If the wiki software detects the presence of the
-file APP_DIR/scripts/pdf.js ([[https://en.wikipedia.org/wiki/PDF.js|PDF.js]])
-or APP_DIR/scripts/epub.js ([[https://github.com/futurepress/epub.js|epub.js]]), the wiki
-system will render the resource in a Javascript viewer and will do things like remember reading
-position.
-
-
-===Video and Audio Resources===
-
-: Not all browsers support the same video and audio formats for playback. For this reason
-it sometimes is useful to have multiple video resources for the same video. For example,
-you might have a .ogv and .vp8 version of the same video recording. In read (non-edit)
-mode, the Yioop wiki system displays only one link for video or audio files that have
-the same name except for extension. It then includes the grouped file as separated <source>
-tags within either the <video> or <audio> html tag used to render the item in the browser.
-In this way, you can make your media take best advantages to whatever capabilities your
-client's browser has. If you don't feel like recoding your media in such a fancy way, a safe
-rule of thumb is that .mp3 audio will playback in all modern browser, and that .mp4 video
-will playback in all modern browser.
-
-: For video it is sometimes useful to add a subtitle or caption track. Yioop wiki supports
-[[https://en.wikipedia.org/wiki/WebVTT|WebVTT]] format subtitles and captions. To see how
-Yioop wiki makes use of these files, suppose you included a resource ''foo.mp4'' in your
-wiki pages, and you also had a file named ''foo-captions-en-US.vtt'' then when the HTML
-page is generated from your wiki page, a <track> tag for the caption file would be added
-to the <video> tag. A user seeing this page would then see in the video player a closed caption
-symbol and be able to turn on/off (defaults off) the English captions. If you wanted
-named the file ''foo-subtitles-en-US.vtt'' instead, then Yioop wiki would include it as a
-subtitles track (defaults on). You can add captions/subtitle files for as many languages as
-desired.
-
-: When viewing the page resources for a page in edit mode, one can see one file/resource and
-no grouping of resources by name is done. In this way you can keep track of exactly what
-resources are available for a page.
-
-==Page Settings, Page Type==
-
-: In edit mode for a wiki page, next to the page name, is a link [Settings].
-Clicking this link expands a form which can be used to control global settings
-for a wiki page.  This form contains a drop down for the page type, another
-drop down for the type of border for the page in non-logged in mode,
-a checkbox for whether a table of contents should be auto-generated from level 2
-and level three headings and then text
-fields or areas for the page title, author, meta robots, and page description.
-Beneath this one can specify another wiki page to be used as a header for this
-page and also specify another wiki page to be used as a footer for this page.
-
-: The contents of the page title is displayed in the browser title when the
-wiki page is accessed with the  Activity Panel collapsed or when not logged in.
-Similarly, in the collapsed or not logged in mode, if one looks as the HTML
-page source for the page,  in the head of document, <meta> tags for author,
-robots, and description are set according to these fields. These fields can
-be useful for search engine optimization. The robots meta tag can be
-used to control how search engine robots index the page. Wikipedia has more information on
-[[https://en.wikipedia.org/wiki/Meta_element|Meta Elements]].
-
-: The '''Standard''' page type treats the page as a usual wiki page.
-
-: '''Page Alias''' type redirects the current page to another page name. This can
-be used to handle things like different names for the same topic or to do localization
-of pages. For example, if you switch the locale from English to French and
-you were on the wiki page dental_floss when you switch to French the article
-dental_floss might redirect to the page dentrifice.
-
-: '''Media List''' type means that the page, when read, should display just the
-resources in the page as a list of thumbnails and links. These links for the
-resources go to a separate pages used to display these resources.
-This kind of page is useful for a gallery of
-images or a collection of audio or video files.
-
-: '''Presentation''' type is for a wiki page whose purpose is a slide presentation. In this mode,
-....
-on a line by itself is used to separate one slide. If presentation type is a selected a new
-slide icon appears in the wiki edit bar allowining one to easily add new slides.
-When the Activity panel is not collapsed and you are reading a presentation, it just
-displays as a single page with all slides visible. Collapsing the Activity panel presents
-the slides as a typical slide presentation using the
+END_HEAD_VARS=Yioop Wiki Syntax=
+
+: Wiki syntax is a lightweight way to markup a text document so that
+it can be formatted and drawn nicely by Yioop.
+This page briefly describes the wiki syntax supported by Yioop.
+
+==Headings==
+: In wiki syntax headings of documents and sections are written as follows:
+
+<nowiki>
+=Level1=
+==Level2==
+===Level3===
+====Level4====
+=====Level5=====
+======Level6======
+</nowiki>
+
+and would look like:
+
+=Level1=
+==Level2==
+===Level3===
+====Level4====
+=====Level5=====
+======Level6======
+
+==Paragraphs==
+: In Yioop two new lines indicates a new paragraph. You can control
+the indent of a paragraph by putting colons followed by a space in front of it:
+
+<nowiki>
+: some indent
+
+:: a little more
+
+::: even more
+
+:::: that's sorta crazy
+</nowiki>
+
+which looks like:
+
+: some indent
+
+:: a little more
+
+::: even more
+
+:::: that's sorta crazy
+
+==Horizontal Rule==
+: Sometimes it is convenient to separate paragraphs or sections with a horizontal
+rule. This can be done by placing four hyphens on a line by themselves:
+<nowiki>
+----
+</nowiki>
+This results in a line that looks like:
+----
+
+==Text Formatting Within Paragraphs==
+: Within a paragraph it is often convenient to make some text bold, italics,
+underlined, etc. Below is a quick summary of how to do this:
+===Wiki Markup===
+{|
+|<nowiki>''italic''</nowiki>|''italic''
+|-
+|<nowiki>'''bold'''</nowiki>|'''bold'''
+|-
+|<nowiki>'''''bold and italic'''''</nowiki>|'''''bold and italic'''''
+|}
+
+===HTML Tags===
+: Yioop also supports several html tags such as:
+{|
+|<nowiki><del>delete</del></nowiki>|<del>delete</del>
+|-
+|<nowiki><ins>insert</ins></nowiki>|<ins>insert</ins>
+|-
+|<nowiki><s>strike through</s> or
+<strike>strike through</strike> </nowiki>|<s>strike through</s>
+|-
+|<nowiki><sup>superscript</sup> and
+<sub>subscript</sub></nowiki>|<sup>superscript</sup> and
+<sub>subscript</sub>
+|-
+|<nowiki><tt>typewriter</tt></nowiki>|<tt>typewriter</tt>
+|-
+|<nowiki><u>underline</u></nowiki>|<u>underline</u>
+|}
+
+===Spacing within Paragraphs===
+: The HTML entity
+<nowiki> </nowiki>
+can be used to create a non-breaking space. The tag
+<nowiki><br></nowiki>
+can be used to produce a line break.
+
+==Preformatted Text and Unformatted Text==
+: You can force text to be formatted as you typed it rather
+than using the layout mechanism of the browser using the
+<nowiki><pre>preformatted text tag.</pre></nowiki>
+Alternatively, a sequence of lines all beginning with a
+space character will also be treated as preformatted.
+
+: Wiki markup within pre tags is still parsed by Yioop.
+If you would like to add text that is not parsed, enclosed
+it in <tt><`mbox{nowiki}`> </`mbox{nowiki}`></tt> tags.
+
+==Styling Text Paragraphs==
+: Yioop wiki syntax offers a number of templates for
+control the styles, and alignment of text for
+a paragraph or group of paragraphs:<br />
+`{{`left| some text`}}`,<br /> `{{`right| some text`}}`,<br />
+and<br />
+`{{`center| some text`}}`<br /> can be used to left-justify,
+right-justify, and center a block of text. For example,
+the last command, would produce:
+{{center|
+some text
+}}
+If you know cascading style sheets (CSS), you can set
+a class or id selector for a block of text using:<br />
+`{{`class="my-class-selector" some text`}}`<br />and<br />
+`{{`id="my-id-selector" some text`}}`.<br />
+You can also apply inline styles to a block of text
+using the syntax:<br />
+`{{`style="inline styles" some text`}}`.<br />
+For example, `{{`style="color:red" some text`}}` looks
+like {{style="color:red" some text}}.
+
+==Lists==
+: The Yioop Wiki Syntax supported of ways of listing items:
+bulleted/unordered list, numbered/ordered lists, and
+definition lists. Below are some examples:
+
+===Unordered Lists===
+<nowiki>
+* Item1
+** SubItem1
+** SubItem2
+*** SubSubItem1
+* Item 2
+* Item 3
+</nowiki>
+would be drawn as:
+* Item1
+** SubItem1
+** SubItem2
+*** SubSubItem1
+* Item 2
+* Item 3
+
+===Ordered Lists===
+<nowiki>
+# Item1
+## SubItem1
+## SubItem2
+### SubSubItem1
+# Item 2
+# Item 3
+</nowiki>
+# Item1
+## SubItem1
+## SubItem2
+### SubSubItem1
+# Item 2
+# Item 3
+
+===Mixed Lists===
+<nowiki>
+# Item1
+#* SubItem1
+#* SubItem2
+#*# SubSubItem1
+# Item 2
+# Item 3
+</nowiki>
+# Item1
+#* SubItem1
+#* SubItem2
+#*# SubSubItem1
+# Item 2
+# Item 3
+
+===Definition Lists===
+<nowiki>
+;Term 1: Definition of Term 1
+;Term 2: Definition of Term 2
+</nowiki>
+;Term 1: Definition of Term 1
+;Term 2: Definition of Term 2
+
+==Tables==
+: A table begins with {`|`  and ends with `|`}. Cells are separated with | and
+rows are separated with |- as can be seen in the following
+example:
+<nowiki>
+{|
+|a||b
+|-
+|c||d
+|}
+</nowiki>
+{|
+|a||b
+|-
+|c||d
+|}
+Headings for columns and rows can be made by using an exclamation point, !,
+rather than a vertical bar |. For example,
+<nowiki>
+{|
+!a!!b
+|-
+|c||d
+|}
+</nowiki>
+{|
+!a!!b
+|-
+|c||d
+|}
+Captions can be added using the + symbol:
+<nowiki>
+{|
+|+ My Caption
+!a!!b
+|-
+|c||d
+|}
+</nowiki>
+{|
+|+ My Caption
+!a!!b
+|-
+|c||d
+|}
+Finally, you can put a CSS class or style attributes (or both) on the first line
+of the table to further control how it looks:
+<nowiki>
+{| class="wikitable"
+|+ My Caption
+!a!!b
+|-
+|c||d
+|}
+</nowiki>
+{| class="wikitable"
+|+ My Caption
+!a!!b
+|-
+|c||d
+|}
+Within a cell attributes like align, valign, styles, and class can be used. For
+example,
+<nowiki>
+{|
+| style="text-align:right;"| a| b
+|-
+| lalala | lalala
+|}
+</nowiki>
+{|
+| style="text-align:right;"| a| b
+|-
+| lalala | lalala
+|}
+
+==Math==
+
+: Math can be included into a wiki document by either using the math tag:
+<nowiki>
+<math>
+\sum_{i=1}^{n} i = frac{(n+1)(n)}{2}
+</math>
+</nowiki>
+
+<math>
+\sum_{i=1}^{n} i = frac{(n+1)(n)}{2}
+</math>
+
+or by enclosing the math in backticks:
+
+<pre>
+`[[1, -2],[3,4]]`
+</pre>
+
+`[[1, -2],[3,4]]`.
+
+Rendering of math is done using [[https://www.mathjax.org/|MathJax]], making us of the [[https://en.wikipedia.org/wiki/ASCIIMathML|ASCIImathml]] extensions.
+
+==Links and Relationships==
+: A hypertext link to another document can be inserted into a wiki page using
+the chain link icon in the GUI. Alternatively, there are several techniques
+for inserting a link into a page depending on whether the link is to a page
+within the same wiki group, is a link to a page on a different wiki
+group, or is a link to a different website. In addition to normal
+hypertext links, Yioop also supports relationship links.
+
+'''Intra-Group Wiki Links''' use the syntax:
+<nowiki>
+[[name_of_wiki_page]]
+or
+[[name_of_wiki_page|text for the link]]
+or
+[[name_of_wiki_page#heading_or_id_on_page|text for the link]]
+</nowiki>
+for example, to make a link to this Syntax page one could write,
+<nowiki>
+[[Syntax|Yioop Wiki Syntax Page]]
+</nowiki>
+which would look like,
+
+[[Syntax|Yioop Wiki Syntax Page]]
+
+'''Inter-Group Wiki Links''' use the syntax:
+<nowiki>
+[[name_of-group@name_of_wiki_page|text for the link]]
+</nowiki>
+
+'''Different Website Links''' use the syntax:
+<nowiki>
+[[website_url|text for the link]]
+</nowiki>
+
+: Relationships are a generalized form of link. They are used to express
+a more complicated linking between two wiki pages and have the syntax:
+
+<nowiki>
+[[relationship_type|wiki_page_name|text for the link]]
+</nowiki>
+
+: In the navigation dropdown for a Yioop wiki page there are items for
+what links to the current page and what relates to the current page
+based on the links and relationships a page belongs to.
+
+==Recent Places Dropdowns==
+: You can add a dropdown that can allow users to navigate to recently visited
+wiki pages using the syntax:
+
+<sub>`[`{recent_places}]</sub>
+
+This looks like:
+
+[{recent_places}]
+
+==Adding Resources to a Page==
+
+: Yioop wiki syntax supports adding search bars, audio, images, and video to a
+page. The magnifying class edit tool icon can be used to add a search bar via
+the GUI. This can also be added by hand with the syntax:
+<nowiki>
+{{search:default|size:small|placeholder:Search Placeholder Text}}
+</nowiki>
+This syntax is split into three parts each separated by a vertical bar |. The
+first part search:default means results from searches should come from the
+default search index. You can replace default with the timestamp of a specific
+index or mix if you do not want to use the default. The second group size:small
+indicates the size of the search bar to be drawn. Choices of size are small,
+medium, and large. Finally, placeholder:Search Placeholder Text indicates the
+grayed out background text in the search input before typing is done should
+read: Search Placeholder Text. Here is what the above code outputs:
+
+{{search:default|size:small|placeholder:Search Placeholder Text}}
+
+: Image, video and other media resources can be associated with a page by dragging
+and dropping them in the edit textarea or by clicking on the link click to select
+link in the gray box below the textarea. This would add wiki code such as
+
+<sub>((resource`:`myphoto.jpg|Resource Description))</sub>
+
+to the page. Only saving the page will save this code and upload the resource to
+the server. In the above ''myphoto.jpg'' is the resource that will be inserted and
+Resource Description is the alternative text to use in case the viewing browser
+cannot display jpg files. To add a resource
+from a different wiki page belonging to the same group to the current wiki
+page one can use a syntax like:
+
+<sub>((resource`:`Documentation:ConfigureScreenForm1.png|The work directory form))</sub>
+
+Here Documentation would be the page and ConfigureScreenForm1.png the resource.
+You can also insert resources from a data-string using ''resource-data'' rather than
+''resource''. For example:
+
+<sub>((resource-data`:`image/jpeg;base64,/9j/4AAQSkZJRg...rest of image data...|Seekquarry Logo))</sub>
+
+could be used to inline an image like:
+
+((resource-|The Seekquarry Logo))
+
+be aware though that the default maximum wiki page size is 512Kb (this can be set in src/configs/Config.php).
+
+: Sometimes it is useful to edit the basic resource link
+above to make a link which is a thumbnail of the resource which points to a
+separate page containing that resource. This can be done using the syntax:
+
+<sub>((resource-thumb`:`myphoto.jpg|Resource Description))</sub>
+
+: Similarly, by default for resources like PDFs, epub's, etc., the resource tag inlines
+the whole resource into the page, if instead one wants a clickable link to a page where
+the resource is displayed one can use the syntax:
+
+<sub>((resource-link`:`my_document.pdf|Resource Description))</sub>
+
+: Comma separated value files (.csv or CSV files) are inlined into a page as a table. Which rows and columns of the CSV to present in this table can be controlled by the resource line. The general format for including
+a CSV resource is:
+
+<sub> ((resource`:`resource_name.csv#config#top_left_cell#bottom_right_cell|Resource Description))</sub>
+
+For example,
+
+<sub>((resource`:`resource_name.csv##B2#C3|Resource Description))</sub>
+
+might output
+
+((resource-data:text/csv;base64,LCwsLAosLTIsMywsCiw1LDQsLAosLCwsCiwsLCwK##B2#C3|Example CSV with Headings))
+
+I.e., just the portion of the CSV given by the rectangle between the cells B2 and C3. Using a config directive we can omit the spreadsheet row and column headings as follows:
+
+<sub>((resource`:`resource_name.csv#noheadings#B2#C3|Resource Description)) </sub>
+
+which might output
+
+
+((resource-data:text/csv;base64,LCwsLAosLTIsMywsCiw1LDQsLAosLCwsCiwsLCwK#noheadings#B2#C3|Example CSV without Headings))
+
+CSV spreadsheet files can also be used to output a variety of charts. The general format for the command to insert a chart resource is:
+
+<sub>((resource-chart_type`:`resource_name.csv#char_config#x_start#x_end#y_start#y_end|Resource Description))</sub>
+
+Here ''chart_type'' can be one of ''bargraph'',  ''linegraph'', or ''pointgraph''. For example, one might have a line like:
+
+<sub>((resource-bargraph`:`resource_name.csv##B1#B4#C1#C4|Quadratic Function)) </sub>
+
+which could produce a chart like
+
+((resource-bargraph:##(1,1)#(2,4)#(3,9)#(4,16)|Quadratic Function))
+
+In the above example, the values for the `x` coordinates would come from the cells B1, B2, B3, B4 from
+''resource_name.csv '' and the values for the `y` coordinates would come from cells C1, C2, C3, C4 from
+''resource_name.csv ''. Alternatively, rather than use a CSV to get out data we can just list the points we want to plot with a command like:
+
+<sub>((resource-bargraph`:`##(1,1)#(2,4)#(3,9)#(4,16)|Quadratic Function))</sub>
+
+==Manipulating Page Resources==
+
+: A list of media that have already been associated with
+a page appears under the Page Resource heading below the textarea. This
+table allows the user to rename and delete resources as well as insert the
+same resource at multiple locations within the same document.
+
+: The resources section of the edit page can be thought of as similar to
+a folder in Windows or MacOS. One can have subfolders of the resource folder.
+
+: The '''Places''' dropdown at the top of the '''Page Resource''' section allows one to navigate
+these folders.
+
+: The '''Filter''' textfield lets you enter a search string.
+Clicking '''Go''' then shows only those resources
+which contain that search string in their title.
+
+: The '''Clip Folder''' dropdown is used to copy files between folders and pages.
+Its current value is the folder that the '''Clip Copy''' buttons next to resources
+will copy their resource to when clicked. You can set the '''Clip Folder''' to
+the current folder using the dropdown, then  move to the page and folder that
+you would like to copy stuff from and click the '''Clip Copy''' button of the
+desired resource.
+
+: The '''Name''', '''Size''', '''Modified''' header links above the resources list
+control the sort order for the resource list. If a page is a media list page,
+then even in read mode, the sort order selected is remembered when drawing the
+media list.
+
+: The '''Actions''' drop can be used to create new folders, new text files, and new csv
+text files within the current page resource folder. These are initial named beginning
+with ''untitled'' followed by some number, and if applicable a file extension.
+
+: Resources entries for the resources list consist first of an icon, followed by a textfield
+with a name for the resource, followed by buttons for actions that can be done to that resource
+(Rename, Add to Page, Clip Copy), followed by a link [X], which can be used to delete the resource.
+If a resource is editable the icon will look like a plus sign together with a pencil. Clicking
+on the icon will then let you edit the resource.
+
+===Text and CSV Resources===
+: For normal text files clicking edit will bring up a textarea with the context of the text to edit.
+For CSV (comma separated value) files this will present the file as an editable spreadsheet.
+Yioop spreadsheets can have equation much like Excel spreadsheets. Clicking on a cell lets one
+edit its contents. For example, if in the cell A3
+one entered the equation:
+ = A1+A2
+then clicking out of the cell would cause it to refresh with the value of the sum of the contents of
+cells A1 and A2. In addition, to the standard arithmetic operators ['*', '/', '+', '-', '%'], the
+spreadsheet expressions can use float or integer literals, and can make use of the following table
+of built-in functions:
+
+{| class="wikitable"
+!Function Name!!Description
+|-
+!avg(x1,...,xn), avg(x1:xn)|| computes average of values of cells listed as arguments
+|-
+!ceil(x)|| rounds the value of x up to nearest integer
+|-
+!cell(i,j)|| returns the contents of the cell with column name of letter j, and row name i. For example, cell(2,'B') would return the contents of cell B2.
+|-
+!col(value, search_row, start_col, end_col)|| searches the row ''search_row'' between the columns
+''start_col'', ''end_col'' for ''value''. Returns the column name where this value was found or -1 if not found.
+For example, col(3, 2, "B", "D") might return C if the cell C2 had value 3.
+|-
+!exp(x)|| computes `e^x`
+|-
+!floor(x)|| rounds the value of x down to the nearest integer
+|-
+!log(x)|| computes `log x`
+|-
+!min(x1,...,xn), min(x1:xn)|| computes minimum value of cells listed as arguments
+|-
+!max(x1,...,xn), max(x1:xn)|| computes maximum value of cells listed as arguments
+|-
+!pow(x,y)|| computes `x^y`
+|-
+!row(value, search_col, start_row, end_row)|| searches the column ''search_col''
+between the rows ''start_row'', ''end_row'' for ''value''.
+Returns the row name where this value was found or -1 if not found.
+For example, row(3, "C", "1", "5") might return 2 if the cell C2 had value 3.
+|-
+!sqrt(x)|| computes `sqrt(x)`
+|-
+!sum(x1,...,xn), sum(x1:xn)|| computes sum of values of cells listed as arguments
+|-
+!username()|| returns username of the person using this CSV file
+|}
+
+===HTML, PDF and EPub Resources===
+: How HTML, PDF, EPub resources included on a page render depends on how the Yioop wiki software
+has been configured. If no special configuration has been done, then HTML and PDF documents
+will bbe rendered in an <iframe> tag within the current wiki page. In the EPub, case a link
+to download the resource will be given. If the wiki software detects the presence of the
+file APP_DIR/scripts/pdf.js ([[https://en.wikipedia.org/wiki/PDF.js|PDF.js]])
+or APP_DIR/scripts/epub.js ([[https://github.com/futurepress/epub.js|epub.js]]), the wiki
+system will render the resource in a Javascript viewer and will do things like remember reading
+position.
+
+
+===Video and Audio Resources===
+
+: Not all browsers support the same video and audio formats for playback. For this reason
+it sometimes is useful to have multiple video resources for the same video. For example,
+you might have a .ogv and .vp8 version of the same video recording. In read (non-edit)
+mode, the Yioop wiki system displays only one link for video or audio files that have
+the same name except for extension. It then includes the grouped file as separated <source>
+tags within either the <video> or <audio> html tag used to render the item in the browser.
+In this way, you can make your media take best advantages to whatever capabilities your
+client's browser has. If you don't feel like recoding your media in such a fancy way, a safe
+rule of thumb is that .mp3 audio will playback in all modern browser, and that .mp4 video
+will playback in all modern browser.
+
+: For video it is sometimes useful to add a subtitle or caption track. Yioop wiki supports
+[[https://en.wikipedia.org/wiki/WebVTT|WebVTT]] format subtitles and captions. To see how
+Yioop wiki makes use of these files, suppose you included a resource ''foo.mp4'' in your
+wiki pages, and you also had a file named ''foo-captions-en-US.vtt'' then when the HTML
+page is generated from your wiki page, a <track> tag for the caption file would be added
+to the <video> tag. A user seeing this page would then see in the video player a closed caption
+symbol and be able to turn on/off (defaults off) the English captions. If you wanted
+named the file ''foo-subtitles-en-US.vtt'' instead, then Yioop wiki would include it as a
+subtitles track (defaults on). You can add captions/subtitle files for as many languages as
+desired.
+
+: When viewing the page resources for a page in edit mode, one can see one file/resource and
+no grouping of resources by name is done. In this way you can keep track of exactly what
+resources are available for a page.
+
+==Page Settings, Page Type==
+
+: In edit mode for a wiki page, next to the page name, is a link [Settings].
+Clicking this link expands a form which can be used to control global settings
+for a wiki page.  This form contains a drop down for the page type, another
+drop down for the type of border for the page in non-logged in mode,
+a checkbox for whether a table of contents should be auto-generated from level 2
+and level three headings and then text
+fields or areas for the page title, author, meta robots, and page description.
+Beneath this one can specify another wiki page to be used as a header for this
+page and also specify another wiki page to be used as a footer for this page.
+
+: The contents of the page title is displayed in the browser title when the
+wiki page is accessed with the  Activity Panel collapsed or when not logged in.
+Similarly, in the collapsed or not logged in mode, if one looks as the HTML
+page source for the page,  in the head of document, <meta> tags for author,
+robots, and description are set according to these fields. These fields can
+be useful for search engine optimization. The robots meta tag can be
+used to control how search engine robots index the page. Wikipedia has more information on
+[[https://en.wikipedia.org/wiki/Meta_element|Meta Elements]].
+
+: The '''Standard''' page type treats the page as a usual wiki page.
+
+: '''Page Alias''' type redirects the current page to another page name. This can
+be used to handle things like different names for the same topic or to do localization
+of pages. For example, if you switch the locale from English to French and
+you were on the wiki page dental_floss when you switch to French the article
+dental_floss might redirect to the page dentrifice.
+
+: '''Media List''' type means that the page, when read, should display just the
+resources in the page as a list of thumbnails and links. These links for the
+resources go to a separate pages used to display these resources.
+This kind of page is useful for a gallery of
+images or a collection of audio or video files.
+
+: '''Presentation''' type is for a wiki page whose purpose is a slide presentation. In this mode,
+....
+on a line by itself is used to separate one slide. If presentation type is a selected a new
+slide icon appears in the wiki edit bar allowining one to easily add new slides.
+When the Activity panel is not collapsed and you are reading a presentation, it just
+displays as a single page with all slides visible. Collapsing the Activity panel presents
+the slides as a typical slide presentation using the
 [[www.w3.org/Talks/Tools/Slidy2/Overview.html|Slidy]] javascript.
 EOD;
 $public_pages["en-US"]["ad_program_terms"] = <<< 'EOD'
@@ -2366,19 +2366,23 @@ robots=

 description=

+alternative_path=
+
 page_header=

 page_footer=

-END_HEAD_VARSThese checkboxes control whether various links and drop downs on the search result and landing
-pages appear or not.
-
-; &#039;&#039;&#039;Word Suggest&#039;&#039;&#039;: Controls whether the suggested query drop down appear as a query is entered in the search bar and whether thesaurus results appear on search result pages.
-; &#039;&#039;&#039;Subsearch&#039;&#039;&#039; : Controls whether the links to subsearches such as Image, Video, and News search appear at the top of all search pages
-; &#039;&#039;&#039;Signin&#039;&#039;&#039; : Controls whether the &#039;&#039;&#039;Sign In&#039;&#039;&#039; link appears at the top of the Yioop landing and search result pages.
-; &#039;&#039;&#039;Cache&#039;&#039;&#039;, &#039;&#039;&#039;Similar&#039;&#039;&#039;, &#039;&#039;&#039;Inlinks&#039;&#039;&#039;, &#039;&#039;&#039;IP Address&#039;&#039;&#039;: Control whether the corresponding links appear after each search result item.
-
+sort=aname

+END_HEAD_VARSThese checkboxes control whether various links and drop downs on the search result and landing
+pages appear or not.
+
+; &#039;&#039;&#039;Word Suggest&#039;&#039;&#039;: Controls whether the suggested query drop down appear as a query is entered in the search bar.
+; &#039;&#039;&#039;Subsearch&#039;&#039;&#039; : Controls whether the links to subsearches such as Image, Video, and News search appear at the top of all search pages
+; &#039;&#039;&#039;Signin&#039;&#039;&#039; : Controls whether the &#039;&#039;&#039;Sign In&#039;&#039;&#039; link appears at the top of the Yioop landing and search result pages.
+; &#039;&#039;&#039;Cache&#039;&#039;&#039;, &#039;&#039;&#039;Similar&#039;&#039;&#039;, &#039;&#039;&#039;Inlinks&#039;&#039;&#039;, &#039;&#039;&#039;IP Address&#039;&#039;&#039;: Control whether the corresponding links appear after each search result item.
+
+

 EOD;
 $help_pages["en-US"]["Seed_Sites_and_URL_Suggestions"] = <<< EOD
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php
index febb9ab14..f5ee96c9e 100755
--- a/src/controllers/SearchController.php
+++ b/src/controllers/SearchController.php
@@ -965,12 +965,12 @@ class SearchController extends Controller implements CrawlConstants
                     $out_pages[$first_image]['IMAGES'] = [];
                 }
                 $out_pages[$first_image]['IMAGES'][] = $page;
-            } else if (!empty($page[self::IS_NEWS])) {
+            } else if (!empty($page[self::IS_FEED])) {
                 if ($first_feed_item == -1) {
                     $first_feed_item = count($out_pages);
-                    $out_pages[$first_feed_item]['NEWS'] = [];
+                    $out_pages[$first_feed_item]['FEED'] = [];
                 }
-                $out_pages[$first_feed_item]['NEWS'][] = $page;
+                $out_pages[$first_feed_item]['FEED'][] = $page;
             } else {
                 $out_pages[] = $page;
             }
diff --git a/src/controllers/components/StoreComponent.php b/src/controllers/components/StoreComponent.php
index 54f47d743..856f595a4 100644
--- a/src/controllers/components/StoreComponent.php
+++ b/src/controllers/components/StoreComponent.php
@@ -441,7 +441,7 @@ class StoreComponent extends Component
         $parent = $this->parent;
         $keywords = explode("," , strtoupper($data['KEYWORDS']));
         array_walk($keywords, [C\NS_COMPONENTS .
-            "AdvertisementComponent", "trim_value"]);
+            "StoreComponent", "trim_value"]);
         $min_bid_reqd = 0;
         $expensive_bid = 0;
         foreach ($keywords as $keyword) {
diff --git a/src/css/search.css b/src/css/search.css
index d2a8a41c0..e2c83a4db 100755
--- a/src/css/search.css
+++ b/src/css/search.css
@@ -1285,20 +1285,6 @@ ul.in-list li
     top: -0.8in;
     width: 8in;
 }
-.html-ltr .thesaurus-serp-results
-{
-    left: 2.2in;
-    position: relative;
-    top: -1.7in;
-    width: 8in;
-}
-.html-rtl .thesaurus-serp-results
-{
-    right: 2.2in;
-    position: relative;
-    top: -1.7in;
-    width: 8in;
-}
 .html-rtl .serp
 {
     position: relative;
@@ -1353,27 +1339,6 @@ ul.in-list li
     top:7px;
     width:100px;
 }
-
-.html-ltr .thesaurus
-{
-    color: #666;
-    font-size: 14pt;
-    font-weight: bold;
-    left: 0.2in;
-    position: relative;
-    top: -0.8in;
-    width: 8in;
-}
-.html-rtl .thesaurus
-{
-    color: #666;
-    font-size: 14pt;
-    font-weight: bold;
-    right: 0.2in;
-    position: relative;
-    top: -0.8in;
-    width: 8in;
-}
 .result
 {
     clear: both;
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php
index f098830af..f498bddde 100755
--- a/src/executables/ArcTool.php
+++ b/src/executables/ArcTool.php
@@ -256,49 +256,66 @@ class ArcTool implements CrawlConstants
         echo "\nBundle Name: $bundle_name\n";
         $archive_type = $this->getArchiveKind($archive_path);
         echo "Bundle Type: $archive_type\n";
-
         if (strcmp($archive_type,"IndexArchiveBundle") != 0) {
             $this->badFormatMessageAndExit($archive_path, "index");
         }
         $index_timestamp = substr($archive_path,
             strpos($archive_path, self::index_data_base_name) +
             strlen(self::index_data_base_name));
-        $mask = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
-        $hash_key = L\crawlHashWord($word, true, $mask) ;
-        $start_time = microtime(true);
-        $info = IndexManager::getWordInfo($index_timestamp, $hash_key, 0,
-            $mask, -1, $start_generation, $num_generations);
-        echo "Dictionary Lookup Time:" . L\changeInMicrotime($start_time)."\n";
-        if (!$info) {
+        $hash_paths = L\allCrawlHashPaths($word, true);
+        $found = false;
+        echo "!!Performing Looking up for phrase " .
+            "at each possible shift position. Outputting results for each ".
+            "possibility!!\n";
+        foreach ($hash_paths as $hash_shift) {
+            if (is_array($hash_shift)) {
+                list($hash_key, $shift) = $hash_shift;
+            } else {
+                $hash_key = $hash_shift;
+                $shift = 0;
+            }
+            $start_time = microtime(true);
+            echo "Looking up in dictionary:\n";
+            echo " Key: ". L\toHexString($hash_key) . "\n";
+            echo " Shift: ". $shift . "\n";
+            $info = IndexManager::getWordInfo($index_timestamp, $hash_key,
+                $shift, -1, $start_generation, $num_generations);
+            echo "Dictionary Lookup Time:" . L\changeInMicrotime($start_time)
+                . "\n";
+            if (!$info) {
+                echo " Key not found\n";
+                continue;
+            }
+            $found = true;
+            echo "Dictionary Tiers: ";
+            $index = IndexManager::getIndex($index_timestamp);
+            $tiers = $index->dictionary->active_tiers;
+            foreach ($tiers as $tier) {
+                echo " $tier";
+            }
+            echo "\nBundle Dictionary Entries for '$word':\n";
+            echo "====================================\n";
+            $i = 1;
+            foreach ($info as $record) {
+                echo "RECORD: $i\n";
+                echo "Hex ID: " . L\toHexString($record[4])."\n";
+                echo "GENERATION: {$record[0]}\n";
+                echo "FIRST WORD OFFSET: {$record[1]}\n";
+                echo "LAST WORD OFFSET: {$record[2]}\n";
+                echo "NUMBER OF POSTINGS: {$record[3]}\n\n";
+                $i++;
+            }
+        }
+        if (!$found) {
             //fallback to old word hashes
             $info = IndexManager::getWordInfo($index_timestamp,
-                L\crawlHash($word, true), 0, "", 1, $start_generation,
+                L\crawlHash($word, true), 0, 1, $start_generation,
                 $num_generations);
             if (!$info) {
-                echo "\n$word does not appear in bundle!\n\n";
+                echo "\n$word does not appear in bundle!\n";
                 exit();
             }
         }
-        echo "Dictionary Tiers: ";
-        $index = IndexManager::getIndex($index_timestamp);
-        $tiers = $index->dictionary->active_tiers;
-        foreach ($tiers as $tier) {
-            echo " $tier";
-        }
-        echo "\nBundle Dictionary Entries for '$word':\n";
-        echo "====================================\n";
-        $i = 1;
-        foreach ($info as $record) {
-            echo "RECORD: $i\n";
-            echo "Hex ID: ".L\toHexString($record[4])."\n";
-            echo "Media Type: " . PhraseParser::getMediaType($record[4]) . "\n";
-            echo "Safe: ". PhraseParser::getSafety($record[4]) . "\n";
-            echo "GENERATION: {$record[0]}\n";
-            echo "FIRST WORD OFFSET: {$record[1]}\n";
-            echo "LAST WORD OFFSET: {$record[2]}\n";
-            echo "NUMBER OF POSTINGS: {$record[3]}\n\n";
-            $i++;
-        }
     }
     /**
      * Prints information about the number of words and frequencies of words
@@ -568,11 +585,12 @@ class ArcTool implements CrawlConstants
                     $max_generation = max($max_generation, $generation);
                 }
                 for ($i = $start_shard; $i < $max_generation + 1; $i++) {
-                    $shard_name = $path."/posting_doc_shards/index$i";
+                    $shard_name = $path . "/posting_doc_shards/index$i";
                     echo "\nShard $i of $num_shards\n";
                     $shard = new IndexShard($shard_name, $i,
                         C\NUM_DOCS_PER_GENERATION, true);
                     if ($dictionary->addShardDictionary($shard)) {
+                        $shard->saveWithoutDictionary();
                         file_put_contents($shard_count_file, $i + 1);
                     } else {
                         echo "Problem adding shard $i";
@@ -929,7 +947,7 @@ class ArcTool implements CrawlConstants
                         $triplet_lists =
                             PhraseParser::extractPhrasesInLists($phrase_string,
                                 $lang);
-                        $word_lists = $triplet_list['WORD_LIST'];
+                        $word_lists = $triplet_lists['WORD_LIST'];
                         $len = strlen($phrase_string);
                         if (PhraseParser::computeSafeSearchScore($word_lists,
                             $len) < 0.012) {
@@ -952,8 +970,7 @@ class ArcTool implements CrawlConstants
                         $doc_keys .= $score_keys;
                     }
                     $shard->addDocumentWords($doc_keys, $offset,
-                        $word_lists, $meta_ids,
-                        PhraseParser::$materialized_metas, true, false);
+                        $word_lists, $meta_ids, true, false);
                     $offset = $object[0];
                 }
                 $seen_partition += $num_to_get;
@@ -963,7 +980,6 @@ class ArcTool implements CrawlConstants
         }
         $this->reindexIndexArchive($archive_path);
     }
-
     /**
      * Used to create an archive_bundle_iterator for a non-yioop archive
      * As these iterators sometimes make use of a folder to store savepoints
diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php
index 3d92faa31..031baece0 100755
--- a/src/executables/Fetcher.php
+++ b/src/executables/Fetcher.php
@@ -2865,8 +2865,7 @@ class Fetcher implements CrawlConstants
             }
             $this->found_sites[self::INVERTED_INDEX][$this->current_server
                 ]->addDocumentWords($doc_keys, self::NEEDS_OFFSET_FLAG,
-                $word_lists, $meta_ids, PhraseParser::$materialized_metas,
-                true, $doc_rank);
+                $word_lists, $meta_ids, true, $doc_rank);
             if (isset($word_and_qa_lists['QUESTION_ANSWER_LIST'])) {
                 $site[self::QUESTION_ANSWERS] =
                     $word_and_qa_lists['QUESTION_ANSWER_LIST'];
@@ -2882,6 +2881,9 @@ class Fetcher implements CrawlConstants
              */
             if (!$this->no_process_links && !isset($site[self::JUST_METAS]) &&
                 !isset($this->programming_language_extension[$lang])) {
+                $tokenizer = PhraseParser::getTokenizer($lang);
+                $has_stopwords_remover =
+                    method_exists($tokenizer, "stopwordsRemover");
                 foreach ($site[self::LINKS] as $url => $link_text) {
                     /* this mysterious check means won't index links from
                       robots.txt. Sitemap will still be in TO_CRAWL, but that's
@@ -2900,17 +2902,25 @@ class Fetcher implements CrawlConstants
                     }
                     $elink_flag = ($link_host != $host) ? true : false;
                     $link_text = strip_tags($link_text);
+                    if ($has_stopwords_remover) {
+                        $useful_text = $tokenizer->stopwordsRemover($link_text);
+                    } else {
+                        $useful_text = $link_text;
+                    }
+                    if (mb_strlen($useful_text) < C\MIN_LINKS_TEXT_DOC) {
+                        continue;
+                    }
                     $ref = ($elink_flag) ? "eref" : "iref";
                     $url = str_replace('|', "%7C", $url);
                     $link_id =
-                        "url|".$url."|text|".urlencode($link_text).
-                        "|$ref|".$site_url;
+                        "url|" . $url . "|text|" . urlencode($link_text) .
+                        "|$ref|" . $site_url;
                     $elink_flag_string = ($elink_flag) ? "e" :
                         "i";
                     $link_keys = L\crawlHash($url, true) .
                         L\crawlHash($link_id, true) .
                         $elink_flag_string.
-                        substr(L\crawlHash($host."/", true), 1);
+                        substr(L\crawlHash($host . "/", true), 1);
                     $summary[self::URL] =  $link_id;
                     $summary[self::TITLE] = $url;
                         // stripping html to be on the safe side
@@ -2937,9 +2947,7 @@ class Fetcher implements CrawlConstants
                     $this->found_sites[self::INVERTED_INDEX][
                         $part_num]->addDocumentWords($link_keys,
                             self::NEEDS_OFFSET_FLAG, $link_word_lists,
-                                $link_meta_ids,
-                                PhraseParser::$materialized_metas, false,
-                                $link_rank);
+                            $link_meta_ids, false, $link_rank);
                 }
             }
             $interim_elapse = L\changeInMicrotime($interim_time);
diff --git a/src/executables/QueryTool.php b/src/executables/QueryTool.php
index 0cf09c645..bbc9a0847 100755
--- a/src/executables/QueryTool.php
+++ b/src/executables/QueryTool.php
@@ -78,7 +78,8 @@ class QueryTool implements CrawlConstants
             $this->usageMessageAndExit();
         }
         $query = $argv[1];
-        $results_per_page = (isset($argv[2])) ? $argv[2] : 10;
+        $results_per_page = (isset($argv[2])) ? (is_numeric($argv[2]) ?
+            $argv[2] : 0 ) : 10;
         $limit = (isset($argv[3])) ? $argv[3] : 0;
         L\setLocaleObject((isset($argv[4])) ? $argv[4] : C\DEFAULT_LOCALE);
         $start_time = microtime(true);
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 692b671cd..5adc90db4 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -344,7 +344,7 @@ class QueueServer implements CrawlConstants, Join
             in_array($argv[3], [self::INDEXER, self::SCHEDULER])) {
             $this->server_type = $argv[3];
             $this->server_name = $argv[3];
-            L\crawlLog($argv[3]." logging started.");
+            L\crawlLog($argv[3] . " logging started.");
         }
         $remove = false;
         $old_message_names = ["QueueServerMessages.txt",
@@ -524,9 +524,8 @@ class QueueServer implements CrawlConstants, Join
             $crawl_params[self::CRAWL_TIME] = $this->crawl_time;
             $crawl_params[self::CRAWL_TYPE] = $this->crawl_type;
             $info_string = serialize($crawl_params);
-            file_put_contents(
-                C\CRAWL_DIR."/schedules/". $this->process_name . "Messages.txt",
-                $info_string);
+            file_put_contents(C\CRAWL_DIR . "/schedules/" .
+                $this->process_name . "Messages.txt", $info_string);
             chmod(C\CRAWL_DIR."/schedules/". $this->process_name .
                 "Messages.txt", 0777);
         }
@@ -872,7 +871,6 @@ class QueueServer implements CrawlConstants, Join
         }
         $close_file = C\CRAWL_DIR.'/schedules/'.self::index_closed_name.
             $this->crawl_time.".txt";
-
         if (!file_exists($close_file) &&
             strcmp($this->server_type, self::BOTH) != 0) {
             file_put_contents($close_file, "2");
@@ -905,10 +903,9 @@ class QueueServer implements CrawlConstants, Join
         $crawl_status['CRAWL_TIME'] = $this->crawl_time;
         $crawl_status['COUNT'] = 0;
         $crawl_status['DESCRIPTION'] = $message;
-        file_put_contents(
-            C\CRAWL_DIR."/schedules/crawl_status.txt",
+        file_put_contents(C\CRAWL_DIR . "/schedules/crawl_status.txt",
             serialize($crawl_status));
-        chmod(C\CRAWL_DIR."/schedules/crawl_status.txt", 0777);
+        chmod(C\CRAWL_DIR . "/schedules/crawl_status.txt", 0777);
     }
     /**
      * When a crawl is being shutdown, this function is called to write
@@ -931,7 +928,7 @@ class QueueServer implements CrawlConstants, Join
             return;
         }
         L\crawlLog("Writing queue contents back to schedules...");
-        $dir = C\CRAWL_DIR."/schedules/".self::schedule_data_base_name.
+        $dir = C\CRAWL_DIR."/schedules/" . self::schedule_data_base_name .
             $this->crawl_time;
         if (!file_exists($dir)) {
             mkdir($dir);
@@ -989,8 +986,9 @@ class QueueServer implements CrawlConstants, Join
                     $data_string = L\webencode(
                         gzcompress(serialize($schedule_data)));
                     $data_hash = L\crawlHash($data_string);
-                    file_put_contents($dir."/At".$schedule_time."From127-0-0-1".
-                        $note_string. "WithHash$data_hash.txt", $data_string);
+                    file_put_contents($dir."/At" . $schedule_time .
+                        "From127-0-0-1". $note_string .
+                        "WithHash$data_hash.txt", $data_string);
                     $data_string = "";
                     $schedule_data[self::TO_CRAWL] = [];
                 }
@@ -1007,7 +1005,7 @@ class QueueServer implements CrawlConstants, Join
             } else {
                 $schedule_time = $time;
             }
-            file_put_contents($dir."/At".$schedule_time."From127-0-0-1".
+            file_put_contents($dir."/At" . $schedule_time . "From127-0-0-1".
                 $note_string . "WithHash$data_hash.txt", $data_string);
         }
         $this->db->setWorldPermissionsRecursive(
@@ -1026,7 +1024,7 @@ class QueueServer implements CrawlConstants, Join
             $this->
                 index_archive->forceSave();
             $this->
-                index_archive->addCurrentShardDictionary();
+                index_archive->addAdvanceGeneration();
             $this->index_archive->dictionary->mergeAllTiers();
         }
         $this->db->setWorldPermissionsRecursive(
diff --git a/src/library/CrawlConstants.php b/src/library/CrawlConstants.php
index 7a632d132..176fe2242 100755
--- a/src/library/CrawlConstants.php
+++ b/src/library/CrawlConstants.php
@@ -231,8 +231,7 @@ interface CrawlConstants
     const CENTROID_WEIGHTED_SUMMARIZER = 'dt';
     const SCRAPER_LABEL = 'du';
     const SCRAPERS = 'dv';
-    const IS_NEWS = "dw";
-    const QUESTION_ANSWERS = 'dx';
-    const CONTENT_SIZE = 'dy';
-    const NO_RANGE = 'dz';
+    const QUESTION_ANSWERS = 'dw';
+    const CONTENT_SIZE = 'dx';
+    const NO_RANGE = 'dy';
 }
diff --git a/src/library/IndexArchiveBundle.php b/src/library/IndexArchiveBundle.php
index d2343cce0..eba02a888 100644
--- a/src/library/IndexArchiveBundle.php
+++ b/src/library/IndexArchiveBundle.php
@@ -252,6 +252,8 @@ class IndexArchiveBundle implements CrawlConstants
     public function addAdvanceGeneration($callback = null)
     {
         $this->addCurrentShardDictionary($callback);
+        echo "Resaving active shard without prefix and dictionary etc\n";
+        $this->getActiveShard()->saveWithoutDictionary();
         //Set up new shard
         $this->generation_info['ACTIVE']++;
         $this->generation_info['CURRENT'] =
@@ -320,8 +322,7 @@ class IndexArchiveBundle implements CrawlConstants
             $current_index_shard_file = $this->dir_name .
                 "/posting_doc_shards/index". $this->generation_info['CURRENT'];
             if (file_exists($current_index_shard_file)) {
-                if (isset($this->generation_info['DISK_BASED']) &&
-                    $this->generation_info['DISK_BASED'] == true) {
+                if (!empty($this->generation_info['DISK_BASED'])) {
                     $this->current_shard = new IndexShard(
                         $current_index_shard_file,
                         $this->generation_info['CURRENT'],
diff --git a/src/library/IndexDictionary.php b/src/library/IndexDictionary.php
index af324a20b..2dd219f0e 100644
--- a/src/library/IndexDictionary.php
+++ b/src/library/IndexDictionary.php
@@ -250,7 +250,7 @@ class IndexDictionary implements CrawlConstants
                 }
             }
             // write prefixes
-            $fh = fopen($this->dir_name."/$i/0".$out_slot.".dic", "wb");
+            $fh = fopen($this->dir_name . "/$i/0" . $out_slot . ".dic", "wb");
             fwrite($fh, substr($prefix_string, $i * $prefix_header_size,
                 $prefix_header_size));
             $j = $num_prefix_letters;
@@ -301,7 +301,7 @@ class IndexDictionary implements CrawlConstants
         for ($i = 0; $i < self::NUM_PREFIX_LETTERS; $i++) {
             crawlTimeoutLog("..processing first index prefix $i of ".
                 self::NUM_PREFIX_LETTERS." in $tier.");
-            $this-> mergeTierFiles($i, $tier, $out_slot);
+            $this->mergeTierFiles($i, $tier, $out_slot);
         }
     }
     /**
@@ -322,7 +322,7 @@ class IndexDictionary implements CrawlConstants
         $prefix_header_size = self::PREFIX_HEADER_SIZE;
         $fh_a = fopen( $file_a, "rb");
         $fh_b = fopen( $file_b, "rb");
-        $fh_out = fopen( $this->dir_name."/$prefix/".($tier + 1).
+        $fh_out = fopen( $this->dir_name . "/$prefix/" . ($tier + 1) .
             "$out_slot.dic", "wb+");
         $prefix_bit = ($prefix & 128) ? 0 : 128;
         // Scan past prefix headers
@@ -530,8 +530,8 @@ class IndexDictionary implements CrawlConstants
      * @param string $record_b a dictionary record including auxiliary records
      *      from the 'b'th file of the give tier
      * @param int $prefix_bit either 0 or 32768. The first bit of an auxiliary
-     *      record should be ~higher order bit of the given prefix letter
-     *      used by the tier file.
+     *      record should be negation of higher order bit of the given prefix
+     *      letter used by the tier file.
      * @return string a single record with merged strings making use of
      *      auxliary records as needed containing
      *      (generation, posting list offset, length) information.
@@ -563,7 +563,7 @@ class IndexDictionary implements CrawlConstants
             $aux_record_len);
         if (count($aux_records) == 3) {
             $record .=  chr($prefix_bit + ($num_aux_records >> 8)) .
-                chr($num_aux_records & 255). implode("", $aux_records);
+                chr($num_aux_records & 255) . implode("", $aux_records);
             $aux_records = [];
             $num_aux_records++;
         }
@@ -609,7 +609,7 @@ class IndexDictionary implements CrawlConstants
         $posting_info = str_split(substr($record_string, $offset + 2, 30), 10);
         if (!isset($posting_info[2]) ){
             crawlLog("Decode Aux Record failed...".
-                toHexString($record_string)."  ".$offset);
+                toHexString($record_string)."  " . $offset);
             crawlLog(print_r($posting_info, true));
             crawlLog(print_r(debug_backtrace(), true));
             exit();
@@ -716,9 +716,6 @@ class IndexDictionary implements CrawlConstants
      * @param bool $raw whether the id is our version of base64 encoded or not
      * @param int $shift how many low order bits to drop from $word_id's
      *    when checking for a match
-     * @param string $mask bit mask to be applied to bytes after the 8th
-     *     byte through 20th byte of word_id. In single word case these
-     *     bytes contain safe:, media:, and class: meta word info
      * @param int $threshold if greater than zero how many posting list
      *    results in dictionary info returned before stopping looking for
      *    more matches
@@ -728,7 +725,7 @@ class IndexDictionary implements CrawlConstants
      * @return mixed an array of entries of the form
      *     generation, first offset, last offset, count
      */
-     public function getWordInfo($word_id, $raw = false, $shift = 0, $mask = "",
+     public function getWordInfo($word_id, $raw = false, $shift = 0,
         $threshold = -1, $start_generation = -1, $num_distinct_generations = -1,
         $with_remaining_total = false)
      {
@@ -738,8 +735,7 @@ class IndexDictionary implements CrawlConstants
         $current_max_generation = -2;
         foreach ($this->active_tiers as $tier) {
             $tier_info = $this->getWordInfoTier($word_id, $raw, $tier, $shift,
-                $mask, $threshold, $start_generation,
-                $num_distinct_generations);
+                $threshold, $start_generation, $num_distinct_generations);
             if (is_array($tier_info) && isset($tier_info[2]) &&
                 is_array($tier_info[2])) {
                 list($found_count, $max_found_generation,
@@ -790,9 +786,6 @@ class IndexDictionary implements CrawlConstants
      * @param int $tier which tier to get word info from
      * @param int $shift how many low order bits to drop from $word_id's
      *    when checking for a match
-     * @param string $mask bit mask to be applied to bytes after the 8th
-     *      byte through 20th byte of word_id. In single word case these
-     *      bytes contain safe:, media:, and class: meta word info
      * @param int $threshold if greater than zero how many posting list
      *      results in dictionary info returned before stopping looking for
      *      more matches
@@ -807,8 +800,7 @@ class IndexDictionary implements CrawlConstants
      *      no data
      */
      public function getWordInfoTier($word_id, $raw, $tier, $shift = 0,
-        $mask = "", $threshold = -1, $start_generation = -1,
-        $num_distinct_generations = -1)
+        $threshold = -1, $start_generation = -1, $num_distinct_generations = -1)
      {
         $num_generations = 0;
         $max_retained_generation = -1;
@@ -827,17 +819,12 @@ class IndexDictionary implements CrawlConstants
         if (strlen($word_id) < 1) {
             return false;
         }
-        if ($mask != "") {
-            $mask_len = min(11, strlen($mask));
-        } else {
-            $mask_len = 0;
-        }
         $word_item_len = $word_key_len + IndexShard::WORD_DATA_LEN;
         $word_data_len = IndexShard::WORD_DATA_LEN;
         $file_num = ord($word_id[0]);
         /*
             Entries for a particular shard have postings for both
-            docs and links. If an entry has more than max_entry_len
+            docs and links. If an entry has more than max_entry_count
             we will assume entry somehow got corrupted and skip that
             generation for that word. Because we are including link have
             set threshold to 5 * number of docs that could be in a shard
@@ -912,7 +899,7 @@ class IndexDictionary implements CrawlConstants
         $id_info = [];
         $num_aux_records = (ord($word_string[$word_key_len]) << 8) +
             ord($word_string[$word_key_len + 1]);
-        $word_string = "\x00\x00".substr($word_string, $word_key_len + 2);
+        $word_string = "\x00\x00" . substr($word_string, $word_key_len + 2);
         $tmp = IndexShard::getWordInfoFromString($word_string, true);
         $check_and_auxes = 1;
         if ($tmp[3] < $max_entry_count) {
@@ -920,10 +907,11 @@ class IndexDictionary implements CrawlConstants
             $previous_id = $id;
             $remember_generation = $previous_generation;
             if ($start_generation <= $previous_generation) {
-                if ($this->checkMaskAndAdd($id, $word_id, $mask, $mask_len,
+                $this->addLookedUpEntry($id, $word_id,
                     $tmp, $info, $total_count, $previous_generation,
                     $previous_id, $num_generations, $num_distinct_generations,
-                    $max_retained_generation, $id_info) && $num_aux_records>0) {
+                    $max_retained_generation, $id_info);
+                if ($num_aux_records > 0) {
                     $this->addAuxInfoRecords($id ,$file_num, $num_aux_records,
                         $total_count, $threshold, $info, $previous_generation,
                         $num_generations, $start +
@@ -946,7 +934,7 @@ class IndexDictionary implements CrawlConstants
            single records get corrupted.
          */
         $break_count = 0;
-        /* we found one match so far (ignoring mask), we are now backing up
+        /* we found one match so far, we are now backing up
            to look for earlier matches
          */
         while ($test_loc >= $low) {
@@ -997,11 +985,12 @@ class IndexDictionary implements CrawlConstants
                     $num_generations < $num_distinct_generations ||
                     $current_generation <= $max_retained_generation
                     )) {
-                    if ($this->checkMaskAndAdd($id, $word_id, $mask, $mask_len,
+                    $this->addLookedUpEntry($id, $word_id,
                         $tmp, $info, $total_count, $previous_generation,
                         $previous_id, $num_generations,
                         $num_distinct_generations, $max_retained_generation,
-                        $id_info) && $num_aux_records > 0) {
+                        $id_info);
+                    if ($num_aux_records > 0) {
                         $this->addAuxInfoRecords($id, $file_num,
                             $num_aux_records, $total_count, $threshold, $info,
                             $previous_generation, $num_generations, $start +
@@ -1020,7 +1009,7 @@ class IndexDictionary implements CrawlConstants
         $test_loc = $check_loc + $check_and_auxes;
         $previous_generation = $remember_generation;
         $break_count = 0;
-        /* from the first match we found (ignoring mask), we are now looking
+        /* from the first match we found, we are now looking
            forward to find matches
          */
         while ($test_loc <= $high) {
@@ -1050,11 +1039,11 @@ class IndexDictionary implements CrawlConstants
                     $num_generations < $num_distinct_generations ||
                     $current_generation <= $max_retained_generation
                     )) {
-                    if ($this->checkMaskAndAdd($id, $word_id, $mask, $mask_len,
-                        $tmp, $info, $total_count, $previous_generation,
-                        $previous_id, $num_generations,
-                        $num_distinct_generations, $max_retained_generation,
-                        $id_info) && $num_aux_records > 0) {
+                    $this->addLookedUpEntry($id, $word_id, $tmp, $info,
+                        $total_count, $previous_generation, $previous_id,
+                        $num_generations, $num_distinct_generations,
+                        $max_retained_generation, $id_info);
+                    if ($num_aux_records > 0) {
                         $this->addAuxInfoRecords($id, $file_num,
                             $num_aux_records, $total_count, $threshold, $info,
                             $previous_generation, $num_generations, $start +
@@ -1078,7 +1067,8 @@ class IndexDictionary implements CrawlConstants
      * a given word id can't be stored in a single record
      *
      * @param string $id word id to add aux records for
-     * @param int $file_num
+     * @param int $file_num which prefix file to read from (always reads
+     *     a file at the max_tier level)
      * @param int $num_aux_records
      * @param int& $total_count
      * @param int $threshold
@@ -1129,7 +1119,9 @@ class IndexDictionary implements CrawlConstants
                     $id_info[$record[0]][] = count($info);
                     $info[] = $record;
                     $total_count += $record[3];
-                    if ($threshold > 0 && $total_count > $threshold) { return; }
+                    if ($threshold > 0 && $total_count > $threshold) {
+                        return;
+                    }
                     $previous_generation = $record[0];
                 }
             }
@@ -1155,18 +1147,14 @@ class IndexDictionary implements CrawlConstants
     /**
      * This method is used when computing the array of
      * (generation, posting_list_start, len, exact_word_id) quadruples when
-     * looking up a $word_id in an index dictionary. It checks
-     * if the $id of a dictionary row matches $word_id up to the $mask info.
-     * If so, it adds the word record to the quadruple array $info that has been
+     * looking up a $word_id in an index dictionary. It adds the
+     * word record to the quadruple array $info that has been
      * calculated so far. It also update $total_count, and as well as
      * $previous info for the previous matching record.
      *
      * @param string $id of a row to compare $word_id against
      * @param string $word_id the word id of a term or phrase we are computing
      *     the quadruple array for
-     * @param string $mask up to 9 byte wask used to say which materialized
-     *     meta words should be checked for when doing a match
-     * @param int $mask_len this should be strlen($mask)
      * @param array $record current record from dictionary that we may or may
      *     not add to info
      * @param array& $info quadruple array we are adding to
@@ -1177,61 +1165,40 @@ class IndexDictionary implements CrawlConstants
      * @param int $num_distinct_generations
      * @param int& $max_retained_generation
      * @param array& $id_info
-     * @return bool whether the record was added
      */
-    public function checkMaskAndAdd($id, $word_id, $mask, $mask_len, $record,
+    public function addLookedUpEntry($id, $word_id, $record,
         &$info, &$total_count, &$previous_generation, &$previous_id,
         &$num_generations, $num_distinct_generations,
         &$max_retained_generation, &$id_info)
     {
         $record[4] = $id;
-        $add_flag = true;
-        if ($mask != "" && strlen($id) > 9 && strlen($word_id) > 9 &&
-            substr_compare($id, $word_id, 9, $mask_len) != 0) {
-            $k = 0;
-            $old_k = 0;
-            while(($k = strpos($mask, "\xFF", $old_k)) !== false) {
-                $loc = $k + 8;
-                if (isset($id[$loc]) && $id[$loc] != $word_id[$loc]) {
-                    $add_flag = false;
-                    break;
-                }
-                if ($k == $old_k) {
-                    $k++;
-                }
-                $old_k = $k;
-            }
-        }
-        if ($add_flag) {
             //adding to the end is front is slower than tacking to end
-            if ($num_distinct_generations > 0) {
-                if (!isset($id_info[$record[0]])) {
-                    $id_info[$record[0]] = [];
-                    if ($num_generations >= $num_distinct_generations) {
-                        if (isset($id_info[$max_retained_generation])) {
-                            foreach ($id_info[$max_retained_generation] as
-                                $key) {
-                                $total_count -= $info[$key][3];
-                                $info[$key] = false;
-                            }
-                            unset($id_info[$max_retained_generation]);
-                        }
-                        $max_retained_generation = max(array_keys($id_info));
-                    } else {
-                        $num_generations++;
-                        if ($record[0] > $max_retained_generation) {
-                            $max_retained_generation = $record[0];
+        if ($num_distinct_generations > 0) {
+            if (!isset($id_info[$record[0]])) {
+                $id_info[$record[0]] = [];
+                if ($num_generations >= $num_distinct_generations) {
+                    if (isset($id_info[$max_retained_generation])) {
+                        foreach ($id_info[$max_retained_generation] as
+                            $key) {
+                            $total_count -= $info[$key][3];
+                            $info[$key] = false;
                         }
+                        unset($id_info[$max_retained_generation]);
+                    }
+                    $max_retained_generation = max(array_keys($id_info));
+                } else {
+                    $num_generations++;
+                    if ($record[0] > $max_retained_generation) {
+                        $max_retained_generation = $record[0];
                     }
                 }
-                $id_info[$record[0]][] = count($info);
             }
-            $info[] = $record;
-            $total_count += $record[3];
-            $previous_generation = $record[0];
-            $previous_id = $id;
         }
-        return $add_flag;
+        $id_info[$record[0]][] = count($info);
+        $info[] = $record;
+        $total_count += $record[3];
+        $previous_generation = $record[0];
+        $previous_id = $id;
     }
     /**
      * Gets from disk $len many bytes beginning at $offset from the
@@ -1302,4 +1269,4 @@ class IndexDictionary implements CrawlConstants
             $this->fhs[$file_num][$tier], self::DICT_BLOCK_SIZE);
         return $this->blocks[$file_num][$tier][$bytes];
     }
-}
\ No newline at end of file
+}
diff --git a/src/library/IndexManager.php b/src/library/IndexManager.php
index af6d718ae..83b710377 100644
--- a/src/library/IndexManager.php
+++ b/src/library/IndexManager.php
@@ -149,8 +149,6 @@ class IndexManager implements CrawlConstants
      *     dictionary
      * @param int $shift if $hash is for a phrase, how many low order
      *     bits of word id to discard
-     * @param string $mask if $hash is for a word, after the 9th byte what
-     *     meta word mask should be applied to the 20 byte hash
      * @param int $threshold after the number of results exceeds this amount
      *     stop looking for more dictionary entries.
      * @param int $start_generation
@@ -161,28 +159,18 @@ class IndexManager implements CrawlConstants
      *      that match $hash)
      */
     public static function getWordInfo($index_name, $hash, $shift = 0,
-        $mask = "", $threshold = -1, $start_generation = -1,
-        $num_distinct_generations = -1, $with_remaining_total = false)
+        $threshold = -1, $start_generation = -1, $num_distinct_generations = -1,
+        $with_remaining_total = false)
     {
         $id = "$index_name:$start_generation:$num_distinct_generations";
         $index = self::getIndex($index_name);
-        $len = strlen($mask);
-        if ($len > 0) {
-            $pre_hash = substr($hash, 0, 8) .
-                "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
-        } else {
-            $pre_hash = $hash;
-        }
         $tmp = [];
-        $test_mask = "";
         if ((!C\nsdefined('NO_FEEDS') || !C\NO_FEEDS) &&
             $start_generation < 0
             && file_exists(C\WORK_DIRECTORY . "/feeds/index")) {
-            //NO_FEEDS defined true in statistic_controller.php
             $use_feeds = true;
             $feed_shard = self::getIndex("feed");
-            $feed_info = $feed_shard->getWordInfo($hash, true, $shift,
-                $mask);
+            $feed_info = $feed_shard->getWordInfo($hash, true, $shift);
             if (is_array($feed_info)) {
                 $tmp[-1] = [-1, $feed_info[0],
                     $feed_info[1], $feed_info[2], $feed_info[3]];
@@ -191,7 +179,7 @@ class IndexManager implements CrawlConstants
         if (!empty($index->dictionary)) {
             $pre_info =
                 $index->dictionary->getWordInfo($hash, true, $shift,
-                $mask, $threshold, $start_generation,
+                $threshold, $start_generation,
                 $num_distinct_generations, true);
         }
         if (!empty($pre_info[1])) {
@@ -230,22 +218,12 @@ class IndexManager implements CrawlConstants
         }
         $pos = -1;
         $total_num_docs = 0;
-        $hashes = allCrawlHashPaths($term_or_phrase, [], [], true);
-        if (!is_array($hashes)) {
-            $hashes = [$hashes];
-        }
+        $hashes = allCrawlHashPaths($term_or_phrase, true);
         foreach ($hashes as $hash) {
-            if (is_array($hash)) {
-                list($num_docs, ) =
-                    self::getWordInfo($index_name, $hash[0],
-                        $hash[1], $hash[2], $threshold, $start_generation,
-                        $num_distinct_generations, true);
-            } else {
-                list($num_docs, ) =
-                    self::getWordInfo($index_name, $hash, 0, "",
-                    $threshold, $start_generation, $num_distinct_generations,
-                    true);
-            }
+            list($num_docs, ) =
+                self::getWordInfo($index_name, $hash[0],
+                    $hash[1], $threshold, $start_generation,
+                    $num_distinct_generations, true);
             $total_num_docs += $num_docs;
             if ($threshold > 0 && $total_num_docs > $threshold) {
                    return $total_num_docs;
diff --git a/src/library/IndexShard.php b/src/library/IndexShard.php
index 0b24c8df7..4dcedaa7b 100644
--- a/src/library/IndexShard.php
+++ b/src/library/IndexShard.php
@@ -323,7 +323,6 @@ class IndexShard extends PersistentStructure implements
      * @param array $word_lists (word => array of word positions in doc)
      * @param array $meta_ids meta words to be associated with the document
      *     an example meta word would be filetype:pdf for a PDF document.
-     * @param array $materialized_metas
      * @param bool $is_doc flag used to indicate if what is being sored is
      *     a document or a link to a document
      * @param mixed $rank either false if not used, or a 4 bit estimate of the
@@ -331,8 +330,7 @@ class IndexShard extends PersistentStructure implements
      * @return bool success or failure of performing the add
      */
     public function addDocumentWords($doc_keys, $summary_offset, $word_lists,
-        $meta_ids = [], $materialized_metas = [], $is_doc = false,
-        $rank = false)
+        $meta_ids = [], $is_doc = false, $rank = false)
     {
         if ($this->word_docs_packed == true) {
             $this->words = [];
@@ -343,7 +341,9 @@ class IndexShard extends PersistentStructure implements
         $link_doc_len = 0;
         $len_key = strlen($doc_keys);
         $num_keys = floor($len_key/self::DOC_KEY_LEN);
-        if ($num_keys * self::DOC_KEY_LEN != $len_key) { return false; }
+        if ($num_keys * self::DOC_KEY_LEN != $len_key) {
+            return false;
+        }
         if ($num_keys % 2 == 0 ) {
             $doc_keys .= self::BLANK; //want to keep docids_len divisible by 16
         }
@@ -358,17 +358,16 @@ class IndexShard extends PersistentStructure implements
         foreach ($meta_ids as $meta_id) {
             $word_lists[$meta_id] = [];
         }
-        $meta_string = encodeMaterialMetas($meta_ids, $materialized_metas);
         //using $this->docids_len divisible by 16
         $doc_offset = $this->docids_len >> 4;
         foreach ($word_lists as $word => $position_list) {
             $occurrences = count($position_list);
             if (isset($position_list["cond_max"])) { //for now
-                $word_id = crawlHashPath($word,
-                    $position_list["cond_max"], [], [], true);
+                $word_id = crawlHashPath($word, $position_list["cond_max"],
+                    true);
                 unset($position_list["cond_max"]);
             }  else {
-                $word_id = crawlHashWord($word, true, $meta_string);
+                $word_id = crawlHashWord($word, true);
             }
             $store = packPosting($doc_offset, $position_list);
             if (!isset($this->words[$word_id])) {
@@ -417,12 +416,10 @@ class IndexShard extends PersistentStructure implements
      * @param bool $raw whether the id is our version of base64 encoded or not
      * @param int $shift how many low order bits to drop from $word_id's
      *    when checking for a match
-     * @param string $mask if $hash is for a word, after the 9th byte what
-     *     meta word mask should be applied to the 20 byte hash
      * @return array first offset, last offset, count, exact matching id (
      *     recall match can ignore low order shift bits)
      */
-    public function getWordInfo($word_id, $raw = false, $shift = 0, $mask = "")
+    public function getWordInfo($word_id, $raw = false, $shift = 0)
     {
         if ($raw == false) {
             //get rid of out modified base64 encoding
@@ -431,7 +428,6 @@ class IndexShard extends PersistentStructure implements
         $is_disk = $this->read_only_from_disk;
         $word_item_len = self::WORD_KEY_LEN + self::WORD_DATA_LEN;
         $word_key_len = self::WORD_KEY_LEN;
-        $mask_len = strlen($mask);
         if ($is_disk) {
             $this->getShardHeader();
             if (!isset($word_id[1])) {
@@ -465,48 +461,6 @@ class IndexShard extends PersistentStructure implements
             $id = substr($word_string, 0, $word_key_len);
             $cmp = compareWordHashes($word_id, $id, $shift);
             if ($cmp === 0) {
-                $found = false;
-                $orig_id = $id;
-                $old_check_loc = $check_loc;
-                while (compareWordHashes($word_id, $id, $shift) == 0 &&
-                    $check_loc >= $low) {
-                    if ($check_loc != $old_check_loc) {
-                        $word_string = $this->getWordString($is_disk, $start,
-                            $check_loc, $word_item_len);
-                        if ($word_string == false) {
-                            break;
-                        }
-                        $id = substr($word_string, 0, $word_key_len);
-                    }
-                    if (matchingWordMetas($word_id, $id, $mask, $mask_len)) {
-                        $found = true;
-                        break;
-                    }
-                    $check_loc--;
-                }
-                $check_loc = $old_check_loc;
-                $id = $orig_id;
-                if (!$found) {
-                    while (compareWordHashes($word_id, $id, $shift) == 0 &&
-                        $check_loc <= $high) {
-                        if ($check_loc != $old_check_loc) {
-                            $word_string = $this->getWordString($is_disk,
-                                $start, $check_loc, $word_item_len);
-                            if ($word_string == false) {
-                                break;
-                            }
-                            $id = substr($word_string, 0, $word_key_len);
-                        }
-                        if (matchingWordMetas($word_id, $id, $mask,$mask_len)) {
-                            $found = true;
-                            break;
-                        }
-                        $check_loc++;
-                    }
-                }
-                if (!$found) {
-                    return false;
-                }
                 $tmp_info = $this->getWordInfoFromString(
                     substr($word_string, $word_key_len));
                 $tmp_info[] = $id;
@@ -668,7 +622,8 @@ class IndexShard extends PersistentStructure implements
         }
         $item[self::DOC_LEN] = $doc_len;
         $item[self::IS_DOC] = $is_doc;
-        $item[self::PROXIMITY]=$this->computeProximity($position_list, $is_doc);
+        $item[self::PROXIMITY] =
+            $this->computeProximity($position_list, $is_doc);
         $occurrences = $this->weightedCount($position_list, $is_doc);
         //override $occurrences if $occurs != 0
         if ($occurs != 0) {
@@ -776,6 +731,9 @@ class IndexShard extends PersistentStructure implements
             self::TITLE => 0,
             self::DESCRIPTION => 0,
             self::LINKS => 0];
+        if (!is_array($position_list)) {
+            return $count;
+        }
         foreach ($position_list as $position) {
             if ($is_doc) {
                 if ($position < C\AD_HOC_TITLE_LENGTH) {
@@ -1289,10 +1247,10 @@ class IndexShard extends PersistentStructure implements
             crawlLog("Saving index shard .. done merge postings to string");
         }
         $this->prepareWordsAndPrefixes($with_logging);
-        if ($with_logging) {
-            crawlLog("Saving index shard .. make prefixes");
-        }
-        $header =  pack("N*", $this->prefixes_len ,
+            if ($with_logging) {
+                crawlLog("Saving index shard .. make prefixes");
+            }
+        $header =  pack("N*", $this->prefixes_len,
             $this->words_len,
             $this->word_docs_len,
             $this->docids_len,
@@ -1337,6 +1295,31 @@ class IndexShard extends PersistentStructure implements
         $this->word_docs_packed = false;
         return $out;
     }
+    /**
+     * This method re-saves a saved shard without the prefixes and dictionary.
+     * It would typically be called after this information has been stored
+     * in an IndexDictionary obbject so that the data is not redundantly stored
+     */
+    public function saveWithoutDictionary()
+    {
+        $this->getShardHeader();
+        $header =  pack("N*", 0, 0,
+            $this->word_docs_len,
+            $this->docids_len,
+            $this->generation,
+            $this->num_docs_per_generation,
+            $this->num_docs,
+            $this->num_link_docs,
+            $this->len_all_docs,
+            $this->len_all_link_docs);
+        $word_docs = $this->getWordDocsSubstring();
+        $doc_infos = $this->getDocInfoSubstring();
+        $fh = fopen($this->filename, "wb");
+        fwrite($fh, $header);
+        fwrite($fh, $word_docs);
+        fwrite($fh, $doc_infos);
+        fclose($fh);
+    }
     /**
      * Computes the prefix string index for the current words array.
      * This index gives offsets of the first occurrences of the lead two char's
@@ -1394,7 +1377,7 @@ class IndexShard extends PersistentStructure implements
     /**
      * Posting lists are initially stored associated with a word as a key
      * value pair. The merge operation then merges them these to a string
-     * help by word_postings. packWords separates words from postings.
+     * by word_postings. packWords separates words from postings.
      * After being applied words is a string consisting of
      * triples (as concatenated strings) word_id, start_offset, end_offset.
      * The offsets refer to integers offsets into a string $this->word_docs
@@ -1504,7 +1487,6 @@ class IndexShard extends PersistentStructure implements
             $postings = substr($this->word_postings,
                 $pos + $key_len + $posting_len, $len);
             $pos += $key_len + $posting_len + $len;
-
             if ($len != $two_doc_len ||
                 strncmp($postings,  self::HALF_BLANK, self::POSTING_LEN) != 0) {
                 if ($fh != null) {
@@ -1582,8 +1564,11 @@ class IndexShard extends PersistentStructure implements
      * @param $len number of bytes to get
      * @return desired string
      */
-    public function getWordDocsSubstring($offset, $len)
+    public function getWordDocsSubstring($offset = 0, $len = 0)
     {
+        if ($len <= 0) {
+            $len = $this->word_docs_len;
+        }
         if ($this->read_only_from_disk) {
             return $this->getShardSubstring($this->word_doc_offset + $offset,
                 $len);
@@ -1611,8 +1596,11 @@ class IndexShard extends PersistentStructure implements
      * @param $len number of bytes to get
      * @return desired string
      */
-    public function getDocInfoSubstring($offset, $len)
+    public function getDocInfoSubstring($offset = 0, $len = 0)
     {
+        if ($len <= 0) {
+            $len = $this->docids_len;
+        }
         if ($this->read_only_from_disk) {
             return $this->getShardSubstring(
                 $this->doc_info_offset + $offset, $len, false);
@@ -1870,4 +1858,4 @@ class IndexShard extends PersistentStructure implements
             substr($value, self::WORD_KEY_LEN,
                 self::WORD_DATA_LEN);
     }
-}
\ No newline at end of file
+}
diff --git a/src/library/PhraseParser.php b/src/library/PhraseParser.php
index 46efbdaa8..14eceb76a 100755
--- a/src/library/PhraseParser.php
+++ b/src/library/PhraseParser.php
@@ -60,11 +60,6 @@ class PhraseParser
         'path:', 'robot:', 'safe:', 'server:', 'site:', 'size:',
         'time:', 'u:', 'version:','weight:', 'w:'
         ];
-    /**
-     * Those meta words whose values will be encoded as part of word_ids
-     * @var array
-     */
-    public static $materialized_metas = ["class:", "media:", "safe:"];
     /**
      * A list of meta words that might be extracted from a query
      * @var array
@@ -1076,57 +1071,6 @@ class PhraseParser
         $link_meta_ids[] = "link:all";
         return $link_meta_ids;
     }
-    /**
-     * Given the word key of a term (a hash of the term string which may
-     * have materialized meta information such as media type encoded in it),
-     * compute the media type.
-     * @param string $word_key hash of term with encoded metas
-     * @return string what media type it is such as Text, Image, News, Video
-     *      if it can be determined and unknown otherwise.
-     */
-    public static function getMediaType($word_key)
-    {
-        if (strlen($word_key) < 10) {
-            return "unknown";
-        }
-        $media_char = $word_key[9];
-        $media_types = ["media:text" => "Text", "media:image" => "Image",
-            "media:video" => "Video", "media:news" => "News"];
-        foreach ($media_types as $type => $common_name) {
-            $material_meta_string = encodeMaterialMetas([$type],
-                PhraseParser::$materialized_metas);
-            if ($material_meta_string[0] == $media_char) {
-                return $common_name;
-            }
-        }
-        return "Unknown";
-    }
-    /**
-     * Given the word key of a term (a hash of the term string which may
-     * have materialized meta information such as safe (not X-rated) search
-     * info encoded in it), compute the safe value.
-     * @param string $word_key hash of term with encoded metas
-     * @return string whether the term is associated with a "safe" page
-     *      in which case the string "True" is returned; an "unsafe" page
-     *      in which case the string "False" is returned; or "Undefined"
-     *      if it cannot be determined from the word key
-     */
-    public static function getSafety($word_key)
-    {
-        if (strlen($word_key) < 11) {
-            return "unknown";
-        }
-        $safety_char = $word_key[10];
-        $safety_types = ["safe:true" => "True", "safe:false" => "False"];
-        foreach ($safety_types as $type => $common_name) {
-            $material_meta_string = encodeMaterialMetas([$type],
-                PhraseParser::$materialized_metas);
-            if ($material_meta_string[1] == $safety_char) {
-                return $common_name;
-            }
-        }
-        return "Undefined";
-    }
     /**
      * Computes the Cosine-similarity of two phrases
      *
diff --git a/src/library/Thesaurus.php b/src/library/Thesaurus.php
deleted file mode 100644
index 77dd37858..000000000
--- a/src/library/Thesaurus.php
+++ /dev/null
@@ -1,361 +0,0 @@
-<?php
-/**
- * SeekQuarry/Yioop --
- * Open Source Pure PHP Search Engine, Crawler, and Indexer
- *
- * Copyright (C) 2009 - 2018  Chris Pollett chris@pollett.org
- *
- * LICENSE:
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * END LICENSE
- *
- * @author Shailesh Padave shaileshpadave49@gmail.com
- * @license https://www.gnu.org/licenses/ GPL3
- * @link https://www.seekquarry.com/
- * @copyright 2009 - 2018
- * @filesource
- */
-namespace seekquarry\yioop\library;
-
-use seekquarry\yioop\configs as C;
-
-/** For Yioop global defines */
-require_once __DIR__."/../configs/Config.php";
-/**
- * Class used to reorder the last 10 links computed by PhraseModel based on
- * thesaurus semantic information. For English, thesaurus semantic information
- * can be provided by WordNet, a lexical English database
- * available at http://wordnet.princeton.edu/
- * To enable, you this have to define WORDNET_EXEC in your local_config file.
- * The idea behind thresaurus reordering is that given a query, it
- * is tagged for parts of speech. Each term is then looked up in thesaurus for
- * those parts of speech. Representative phrases for those term senses are
- * extracted from the ranked thesaurus output and a set of rewrites of the
- * original query are created. By looking up the number
- * of times these rewrites occur in the searched index the top two phrases
- * that represent the original query are computed.The BM25 similarity of these
- * phrases is then scored against each of the 10 output summaries of
- * PhraseModel and used to reorder the results.
- * To add thesaurus reordering for a different locale, two methods need to be
- * written in that locale tokenizer.php file
- * tagPartsOfSpeechPhrase($phrase) which on an input phrase return a string
- *     where each term_i in the phrase has been replace with term_i~pos
- *     where pos is a two character part of speech NN, VB, AJ, AV, or NA (if
- *     none of the previous apply)
- * scoredThesaurusMatches($term, $word_type, $whole_query) which takes
- *     a term from an original whole_query which has been tagged to be
- *     one of the types VB (for verb), NN (for noun), AJ (for adjective),
- *     AV (for adverb), or NA (for anything else), it outputs
- *     a sequence of  (score => array of thesaurus terms) associations.
- *     The score representing one word sense of term
- * Given that these methods have been implemented if the use_thesaurus field
- * of that language tokenizer is set to true, the thesaurus will be used.
- */
-class Thesaurus
-{
-    /**
-     * Extracts similar phrases to the input query using thesaurus results.
-     * Part of speech tagging is processed on input and the output is
-     * looked up in the thesaurus. USing this a ranked list of alternate
-     * query phrases is created.
-     * For those phrases, counts in the Yioop index are calculated
-     * and the top two phrases are selected.
-     * @param string $orig_query input query from user
-     * @param string $index_name selected index for search engine
-     * @param string $lang locale tag for the query
-     * @param integer $threshold once count in posting list for any word
-     *     reaches to threshold then return the number
-     * @return array of top two words
-     */
-    public static function getSimilarPhrases($orig_query, $index_name,
-        $lang, $threshold = 10)
-    {
-        $num_docs = [];
-        $scores = [];
-
-        $suggested_queries =
-            self::getInitialSuggestions($orig_query, $lang);
-        foreach ($suggested_queries as $suggestion) {
-            $num_docs[$suggestion] =
-                self::numDocsIndex($suggestion, $threshold, $index_name, $lang);
-        }
-        arsort($num_docs);
-        $result = [];
-        $i = 0;
-        foreach ($num_docs as $k => $v) {
-            $result[$i] = $k;
-            $i++;
-            if ($i >= 2) { break; }
-        }
-        return $result;
-    }
-    /**
-     * Gets array of BM25 scores for given input array of summaries
-     * and thesaurus generated queries
-     * @param array $similar_phrases an array of thesaurus generated queries
-     * @param array $summaries an array of summaries which is generated
-     *     during crawl time.
-     * @return array of BM25 score for each document based on the thesaurus
-     * simimar phrases
-     */
-    public static function scorePhrasesSummaries($similar_phrases, $summaries)
-    {
-        $score = [];
-        //if there are no similar words then
-        if (empty($similar_phrases)) {
-            return [];
-        } else {
-            $num_phrases = count($similar_phrases);
-            for ($i = 0; $i < $num_phrases; $i++) {
-                $phrase = $similar_phrases[$i];
-                $terms = explode(' ', $phrase);
-                $summaries = self::changeCaseOfStringArray($summaries);
-                $idf = self::calculateIDF($summaries, $terms);
-                $tf = self::calculateTFBM25($summaries, $terms);
-                $num_summaries = count($summaries);
-                $num_terms = count($terms);
-                $bm25_result[$i] =
-                    self::calculateBM25($idf, $tf, $num_terms, $num_summaries);
-            }
-            if (count($bm25_result) == 1) {
-                for ($i = 0; $i < $num_summaries; $i++) {
-                    $temp = 0;
-                    $temp = $bm25_result[0][$i];
-                    $score[$i] = $temp;
-                }
-            } else {
-                for ($i = 0; $i < $num_summaries; $i++) {
-                    $temp = 0;
-                    $temp = $bm25_result[0][$i] * (2/3) +
-                        $bm25_result[1][$i] * (1/3);
-                    $score[$i] = $temp;
-                }
-            }
-            return $score;
-        }
-    }
-    /**
-     * Computes suggested related phrases from thesaurus based on part of
-     * speech  done on each query term.
-     *
-     * @param string $query query entered by user
-     * @param string $lang locale tag for the query
-     * @return string array $suggestion consisting of phrases suggested to
-     *     be similar in meaning to some sens of the query
-     */
-    public static function getInitialSuggestions($query, $lang)
-    {
-        $tokenizer = PhraseParser::getTokenizer($lang);
-        $pos_query = $tokenizer->tagPartsOfSpeechPhrase($query);
-        $max_len = 25;
-        $replacement_phrases = [];
-        $suggestions = [];
-        $terms = preg_split("/\s+|\-/", trim($query));
-        $pos_terms = preg_split("/\s+/",
-            trim($pos_query), -1, PREG_SPLIT_NO_EMPTY);
-        $num_pos_terms = count($pos_terms);
-        $word_type = null;
-        $similar_words = [];
-        $known_word_types = ["NN", "VB", "AJ", "AV"];
-        for ($i = 0; $i < $num_pos_terms; $i++) {
-            $pos = strpos($pos_terms[$i], '~');
-            $word_type = trim(substr($pos_terms[$i], $pos + 1));
-            if (!in_array($word_type, $known_word_types)) {
-                $word_type = "NA";
-            }
-            $current_word = substr($pos_terms[$i], 0, $pos);
-            if ($word_type != "NA") {
-                $similar_phrases = $tokenizer->scoredThesaurusMatches(
-                    $current_word, $word_type, $query);
-                $highest_scoring_sense_phrases = ($similar_phrases) ?
-                    array_shift($similar_phrases): false;
-                if ($highest_scoring_sense_phrases) {
-                    $replacement_phrases[$current_word] =
-                        $highest_scoring_sense_phrases;
-                }
-            }
-        }
-        $i = 0;
-        foreach ($replacement_phrases as $words => $similar_phrases) {
-            foreach ($similar_phrases as $phrase) {
-                if (mb_strpos(trim($phrase), ' ') !== false) {
-                    $phrase = preg_replace('/~[\w]+/', '', $phrase);
-                }
-                $modified_query = preg_replace(
-                    '/' . $words . '/', trim($phrase), $query);
-                if (mb_strlen($modified_query) < $max_len &&
-                    mb_strpos($modified_query, $query) === false) {
-                    $suggestions[$i] = $modified_query;
-                    $i++;
-                }
-            }
-        }
-        return $suggestions;
-    }
-    /**
-     * Returns the number of documents in an index that a phrase occurs in.
-     * If it occurs in more than threshold documents then cut off search.
-     *
-     * @param string $phrase to look up in index
-     * @param int $threshold once count in posting list for any word
-     *     reaches to threshold then return the number
-     * @param string $index_name selected index for search engine
-     * @param string $lang locale tag for the query
-     * @return int number of documents phrase occurs in
-     */
-    public static function numDocsIndex($phrase, $threshold, $index_name, $lang)
-    {
-        PhraseParser::canonicalizePunctuatedTerms($phrase, $lang);
-        $terms = PhraseParser::stemCharGramSegment($phrase, $lang);
-        $num  = count($terms);
-        if ($index_name == null) {
-            return 0;
-        }
-        if (count($terms) > C\MAX_QUERY_TERMS) {
-            $terms  = array_slice($terms, 0, C\MAX_QUERY_TERMS);
-        }
-        $whole_phrase = implode(" ", $terms);
-        return IndexManager::numDocsTerm($whole_phrase, $index_name,
-            $threshold);
-    }
-    /**
-     * Lower cases an array of strings
-     *
-     * @param array $summaries strings to put into lower case
-     * @return array with strings converted to lower case
-     */
-    public static function changeCaseOfStringArray($summaries)
-    {
-        return explode("-!-", mb_strtolower(implode("-!-", $summaries)));
-    }
-    /**
-     * Computes the BM25 of an array of documents given that the idf and
-     * tf scores for these documents have already been computed
-     *
-     * @param array $idf inverse doc frequency for given query array
-     * @param array $tf term frequency for given query array
-     * @param $num_terms number of terms that make up input query
-     * @param $num_summaries count for input summaries
-     * @returns array consisting of BM25 scores for each document
-     */
-    public static function calculateBM25($idf, $tf, $num_terms, $num_summaries)
-    {
-        $scores = [];
-        for ($i = 0; $i < $num_terms; $i++) {
-            for ($j = 0; $j < $num_summaries; $j++) {
-                $bm25_score[$i][$j] = $idf[$i] * $tf[$i][$j];
-            }
-        }
-        for ($i = 0; $i < $num_summaries; $i++) {
-            $val = 0;
-            for ($j = 0; $j < $num_terms; $j++) {
-                $val += $bm25_score[$j][$i];
-            }
-            $scores[$i] = $val;
-        }
-        return $scores;
-    }
-    /**
-     * Calculates the BM25 normalized term frequency of a set of terms in
-     * a collection of text summaries
-     *
-     * @param array $summaries list of summary strings to compute BM25TF w.r.t
-     * @param array $terms we want the term frequency computation for
-     * @return array $tfbm25 a 2d array with rows being indexed by terms and
-     *     columns indexed by summaries and the values of an entry being
-     *     the tfbm25 score for that term in that document
-     */
-    public static function calculateTFBM25($summaries, $terms)
-    {
-        $k1 = 1.5;
-        $b = 0.75;
-        $tf_values = [];
-        $tfbm25 = [];
-        $doc_length = strlen(implode("", $summaries));
-        $num_summaries = count($summaries);
-        if ($num_summaries!= 0) {
-            $avg_length = $doc_length / $num_summaries;
-        } else {
-            $avg_length = 0;
-        }
-        $avg_length = max($avg_length, 1);
-        $tf_values = self::calculateTermFreq($summaries, $terms);
-        $num_terms =count($terms);
-        for ($i = 0; $i < $num_terms; $i++) {
-            for ($j = 0; $j < $num_summaries; $j++) {
-                $frequency = $tf_values[$i][$j];
-                $tfbm25[$i][$j] =
-                    ($frequency * ($k1 + 1))/($frequency + $k1 *
-                    ((1 - $b) + $b * ($doc_length/$avg_length)));
-            }
-        }
-        return $tfbm25;
-    }
-    /**
-     * Computes a 2D array of the number of occurences of term i in document j
-     *
-     * @param array $summaries documents to compute frequencies in
-     * @param array $terms terms to compute frequencies for
-     * @return array 2D array as described above
-     */
-    public static function calculateTermFreq($summaries, $terms)
-    {
-        $tf_values = [];
-        $num_terms = count($terms);
-        $num_summaries = count($summaries);
-        for ($i = 0; $i < $num_terms; $i++) {
-            for ($j = 0; $j < $num_summaries; $j++) {
-                if ($terms[$i] != "") {
-                    $frequency = substr_count($summaries[$j], $terms[$i]);
-                    $tf_values[$i][$j] = $frequency;
-                } else {
-                    $tf_values[$i][$j] = 0;
-                }
-            }
-        }
-        return $tf_values;
-    }
-    /**
-     * To get the inverse document frequencies for a collection of terms in
-     * a set of documents.
-     * IDF(term_i) = log_10(# of document / # docs term i in)
-     *
-     * @param array $summaries documents to use in calculating IDF score
-     * @param array $terms terms to compute IDF score for
-     * @return array $idf 1D-array saying the inverse document frequency for
-     * each term
-     */
-    public static function calculateIDF($summaries, $terms)
-    {
-        $N = count($summaries);
-        $Nt = [];
-        $term_count = 0;
-        $num_terms = count($terms);
-        for ($i = 0; $i < $num_terms; $i++) {
-            $cnt_Nt = 0;
-            $term_count++;
-            foreach ($summaries as $summary)
-            {
-                if (stripos($summary, $terms[$i]) !== false) {
-                    $cnt_Nt++;
-                }
-            }
-            $Nt[$i] = $cnt_Nt;
-            $idf[$i] = ($Nt[$i] != 0) ? log10($N / $Nt[$i]) : 0;
-        }
-        return $idf;
-    }
-}
diff --git a/src/library/Utility.php b/src/library/Utility.php
index aadb01782..2d7fe1807 100755
--- a/src/library/Utility.php
+++ b/src/library/Utility.php
@@ -36,7 +36,7 @@ namespace seekquarry\yioop\library;
 use seekquarry\yioop\configs as C;

 /** For Yioop global defines */
-require_once __DIR__."/../configs/Config.php";
+require_once __DIR__ . "/../configs/Config.php";
 /**
  * Adds delimiters to a regex that may or may not have them
  *
@@ -50,8 +50,8 @@ function addRegexDelimiters($expression)
     $last = $expression[$len - 1];
     if (($first != $last && $len > 1) || $len == 1) {
         $expression = ($first != '/' ) ?
-            "/".$expression."/"
-            : "@".$expression."@";
+            "/" . $expression . "/"
+            : "@" . $expression . "@";
     }
     return $expression;
 }
@@ -233,7 +233,6 @@ function vByteDecode(&$str, &$offset)
         $pos_int += (ord($str[$offset] & 127) << $shift);
         $shift += 7;
     }
-
     return $pos_int;
 }
 /**
@@ -285,7 +284,7 @@ function packPosting($doc_index, $position_list, $delta = true)
  */
 function unpackPosting($posting, &$offset, $dedelta = true)
 {
-    $delta_list = decodeModified9($posting, $offset);
+    $delta_list = (array) decodeModified9($posting, $offset);
     $doc_index = array_shift($delta_list);
     if (($doc_index & (2 << 26)) > 0) {
         $delta0 = ($doc_index & ((2 << 9) - 1));
@@ -946,17 +945,14 @@ function crawlHash($string, $raw = false)
  *
  * @param string $string word to hash
  * @param bool $raw whether to base64Hash the result
- * @param $meta_string the up to 11 byte string of meta information
  * @return string first 8 bytes of md5 of $string concatenated with \x00
  *     to indicate the hash is of a word not a phrase concatenated  with the
  *     padded to 11 byte $meta_string.
  */
-function crawlHashWord($string, $raw = false, $meta_string = "")
+function crawlHashWord($string, $raw = false)
 {
     $pre_hash = substr(md5($string, true), 0, 8) .
-        "\x00";
-    $meta_string = substr($meta_string, 0, 11);
-    $pre_hash .= $meta_string;
+        "\x00" . substr($string, 0, 11);
     $pre_hash = str_pad($pre_hash, 20, "\x00");
     /* low order bytes all 0 -- distinguishes it from a crawlHashPath */
     if (!$raw) {
@@ -973,24 +969,15 @@ function crawlHashWord($string, $raw = false, $meta_string = "")
  * maximal.
  *
  * @param string $string what to find hashes for
- * @param array $metas array of meta word values
- * @param array $encode_metas a list of meta word names to encode in word_ids
  * @param bool $raw whether to base64 the result
  * @return array of hashes with appropriates shifts if needed
  */
-function allCrawlHashPaths($string, $metas = [], $encode_metas = [],
-    $raw = false)
+function allCrawlHashPaths($string, $raw = false)
 {
-    $mask = "";
-    if ($encode_metas != []) {
-        $mask_num = min(11, count($encode_metas));
-        $found_materialized_metas = findMaterialMetas($metas, $encode_metas);
-        foreach ($encode_metas as $meta) {
-            $mask .= (isset($found_materialized_metas[$meta])) ? "\xFF": "\x00";
-        }
-    }
     $pos = -1;
     $hashes = [];
+    $last_entry = null;
+    $new_entry = null;
     $zero = "*";
     $shift = 0;
     $num_spaces = substr_count($string, " ");
@@ -1000,8 +987,7 @@ function allCrawlHashPaths($string, $metas = [], $encode_metas = [],
         $old_pos = $pos;
         $path_string = $string;
         for ($i = 0; $i < $num; $i++) {
-            $hash = crawlHashPath($path_string, $pos + 1, $metas,
-                $encode_metas, $raw);
+            $hash = crawlHashPath($path_string, $pos + 1, $raw);
             if ($i > 0 && $j > 0) {
                 $path_len = $num_spaces - $j + 1 + $i;
                 if ($path_len < 4) {
@@ -1075,97 +1061,23 @@ function allCrawlHashPaths($string, $metas = [], $encode_metas = [],
                         $shift = 64 + 29 * ($i - 12);
                     }
                 }
-                $hashes[] = [$hash, $shift, $mask];
-            } else if ($mask != "") {
-                $hashes[] = [$hash, $shift, $mask];
+                $new_entry = [$hash, $shift];
             } else {
-                $hashes[] = $hash;
+                $new_entry = [$hash, 0];
+            }
+            if ($new_entry != $last_entry) {
+                $hashes[] = $new_entry;
+            }
+            if ($j == 0) {
+                break;
             }
-            if ($j == 0) {break; }
             $path_string .= " " . $zero;
         }
         $pos = mb_strpos($string, " ", $pos + 1);
-        $encode_metas = [];
         $j++;
     } while($pos > 0 && $old_pos != $pos);
-    if (count($hashes) == 1) {
-        return $hashes[0];
-    }
     return $hashes;
 }
-/**
- * Give an array of values for meta words (for example, media:video, lang:en)
- * and an array of names of meta words to be encoded into word_id's
- * (for example, media:, safe:, class:) return an associative array of pairs
- * (meta word name =>array(value of that name)) which should be encoded
- * into word id's
- *
- * @param array $metas array of meta word values
- * @param array $encode_metas a list of meta word names to encode in word_ids
- * @return array $found_materialized_metas associative array of name =>
- *     values for that name
- */
-function findMaterialMetas($metas, $encode_metas)
-{
-    $found_materialized_metas = [];
-    foreach ($metas as $meta_id) {
-        if ($encode_metas != []) {
-            $match_kinds = explode(":", $meta_id);
-            $next_char = (isset($match_kinds[1][0])) ? $match_kinds[1][0] :
-                ord('a');
-            $is_class = ($match_kinds[0] == 'class');
-            if (count($match_kinds) > 1 &&
-                in_array($match_kinds[0].":", $encode_metas) &&
-                !in_array($match_kinds[1], ["all"]) &&
-                !isset($match_kinds[2])) {
-                    $found_materialized_metas[$match_kinds[0].":"][] =
-                        $meta_id;
-            }
-        }
-    }
-    return $found_materialized_metas;
-}
-/**
- * Give an array of values for meta words (for example, media:video)
- * and an array of names of meta words to be encoded into word_id's
- * (for example, media:, safe:, class:) returns a string mask for the
- * byte positions in a word_id after the 9th byte. The format of a word id
- * in the case of a single word is described in the documentation for
- * @see crawlHashPath
- *
- * @param array $metas a list of meta word values extracted from a query
- *      string or document.
- * @param array $encode_metas a list of meta word names that should be encoded
- *      in word id's For example, (media:, safe:, class:)
- * @return string a 9 byte string where encoded meta word values have been
- *     stored
- */
-function encodeMaterialMetas($metas, $encode_metas)
-{
-    if (!is_array($encode_metas) || empty($encode_metas)) {
-        return "";
-    }
-    $found_materialized_metas = findMaterialMetas($metas, $encode_metas);
-    $meta_string = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
-    foreach ($found_materialized_metas as $name => $values) {
-        foreach ($values as $value) {
-            if ($name == 'class:' && isset($value[6])) {
-                $pre_meta_pos = ord($value[6]);
-                /*
-                   positions for classifier classes start at 2
-                 */
-                $meta_pos = (($pre_meta_pos) % 9) + 2;
-            } else {
-                /* m is first char of media, s is first char of s
-                   offset will be 1 if safe, 0 if media
-                 */
-                $meta_pos = (ord($name[0]) > ord('m')) ? 1 : 0;
-            }
-            $meta_string[$meta_pos] = substr(crawlHash($value, true), 0, 1);
-        }
-    }
-    return $meta_string;
-}
 /**
  * Given a string makes an 20 byte hash path - where first 8 bytes is
  * a hash of the string before path start, last 12 bytes is the path
@@ -1185,36 +1097,22 @@ function encodeMaterialMetas($metas, $encode_metas)
  * If $path_start is 0 behaves like crawlHashWord(). The above encoding is
  * typically used to make word_ids for whole phrases, to make word id's
  * for single words, the format is
- * (64 bits for word, 1 byte null, remaining 11 bytes encode an materialized
- * meta words present in document or query string). Of this 11 bytes,
- * the first is used for the meta word media:, so if the document is of type
- * media:image, then a single byte hash of media:image gives the value of this
- * byte. The second byte encodes the meta word safe: in a similar fashion.
- * The remaining 9 bytes encode different values of the class: meta word.
- * To encode class:some_value., first class:some_value[0] is hashed to a value
- * j betwen 0 and 8. Then class:some_value is hash to a single byte b. Then
- * the jth value of the remaining bytes is set to b. Non affected bytes are
- * null.
+ * (64 bits for word, 1 byte null, then ignored 11 bytes ).
  *
  * @param string $string what to hash
  * @param int $path_start what to use as the split between 5 byte front
  *     hash and the rest
- * @param array $metas meta word values from a document or query string
- * @param array $encode_metas a list of names of meta word values which should
- *     encoded into word ids. i.e., (media:, safe:, class:) or none.
  * @param bool $raw whether to modified base64 the result
  * @return string 8 bytes that results from this hash process
  */
-function crawlHashPath($string, $path_start = 0, $metas = [],
-    $encode_metas = [], $raw = false)
+function crawlHashPath($string, $path_start = 0, $raw = false)
 {
     if ($path_start > 0 ) {
         $string_parts = explode(" ", substr($string, $path_start));
         $num_parts = count($string_parts);
     }
     if ($path_start == 0 || $num_parts == 0) {
-        $meta_string = encodeMaterialMetas($metas, $encode_metas);
-        $hash = crawlHashWord($string, true, $meta_string);
+        $hash = crawlHashWord($string, true);
         if (!$raw) {
             $hash = base64Hash($hash);
         }
@@ -1227,7 +1125,6 @@ function crawlHashPath($string, $path_start = 0, $metas = [],
     $path_ints = [];
     $modes = [3, 3, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13];
     $mode_nums = [1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6];
-
     foreach ($string_parts as $part) {
         if ($part == "*") {
             $path_ints[] = 0;
@@ -1236,7 +1133,9 @@ function crawlHashPath($string, $path_start = 0, $metas = [],
         }
     }
     $num_parts = count($path_ints);
-    if ($num_parts > 13) {$num_parts = 13; }
+    if ($num_parts > 13) {
+        $num_parts = 13;
+    }
     $mode = $modes[$num_parts];
     $mode_num = $mode_nums[$num_parts];
     switch ($mode) {
@@ -1268,7 +1167,6 @@ function crawlHashPath($string, $path_start = 0, $metas = [],
                 + ($path_ints[4] & $mask)) << $shift)
                 + ($path_ints[5] & $mask)) << $shift)
                 + ($path_ints[6] & $mask);
-
         break;
         case 9:
             $path_ints[8] = isset($path_ints[8]) ? $path_ints[8] : 0;
@@ -1366,11 +1264,7 @@ function crawlHashPath($string, $path_start = 0, $metas = [],
  */
 function compareWordHashes($id1, $id2, $shift = 0)
 {
-    if (!isset($id1[8]) || !isset($id2[8])) {
-        return strncmp($id1, $id2, 8);
-    } else if ($id1[8] == "\x00") {
-        return strncmp($id1, $id2, 9);
-    } else if ($shift < 32) {
+    if ($shift < 32) {
         $cmp = strncmp($id1, $id2, 16);
     } else if ($shift < 64) {
         $cmp = strncmp($id1, $id2, 12);
@@ -1393,35 +1287,6 @@ function compareWordHashes($id1, $id2, $shift = 0)
     $id2 = packInt(unpackInt(substr($id2, $pos, 4)) >> $shift);
     return strcmp($id1, $id2);
 }
-/**
- * Check if two word id's match according to a mask of the last 12 bytes.
- *
- * @param string $word_id 20 byte word id to compare
- * @param string $id 20 byte word id to compare
- * @param string $mask what mask to use
- * @param string $mask_len the length of the mask
- * @return bool true if match; false otherwise
- */
-function matchingWordMetas($word_id, $id, $mask = "", $mask_len = 0)
-{
-    if ($mask != "" && strlen($id) > 9 && strlen($word_id) > 9 &&
-        substr_compare($id, $word_id, 9, $mask_len) != 0) {
-        $k = 0;
-        $old_k = 0;
-        while(($k = strpos($mask, "\xFF", $old_k)) !== false) {
-            $loc = $k + 8;
-            if (isset($id[$loc]) && $id[$loc] != $word_id[$loc]) {
-                return false;
-                break;
-            }
-            if ($k == $old_k) {
-                $k++;
-            }
-            $old_k = $k;
-        }
-    }
-    return true;
-}
 /**
  * Converts a crawl hash number to something closer to base64 coded but
  * so doesn't get confused in urls or DBs
@@ -1983,28 +1848,6 @@ function generalIsA($class_1, $class_2)
     }
     return (is_a($class_1, $class_2) || is_subclass_of($class_1, $class_2));
 }
-/**
- * Given an array of arrays acting much like a database table, this
- * returns a sequence of key value pairs, where the keys are the distinct
- * entries in $key_column and the values are the counts of numbers in
- * $count_column for each particular key;
- *
- * @param array $arr an array of arrays
- * @param mixed $key_column (string or int) field name of key column
- * @param mixed $count_column (string or int) field name of count column
- * @return array key => values pairs of counts
- */
-function arrayColumnCount($arr, $key_column, $count_column)
-{
-    $out_arr = [];
-    foreach ($arr as $row) {
-        if (!isset($out_arr[$row[$key_column]])) {
-            $out_arr[$row[$key_column]] = 0;
-        }
-        $out_arr[$row[$key_column]] += $row[$count_column];
-    }
-    return $out_arr;
-}
 /**
  * Given the contents of a start XML/HMTL tag strips out all the attributes
  * non listed in $safe_attribute_list
diff --git a/src/library/WebArchiveBundle.php b/src/library/WebArchiveBundle.php
index 65ab1e202..47e3bc8fb 100755
--- a/src/library/WebArchiveBundle.php
+++ b/src/library/WebArchiveBundle.php
@@ -118,7 +118,7 @@ class WebArchiveBundle
             $info = unserialize(
                 file_get_contents($this->dir_name."/description.txt"));
         } else {
-            $this->version = 1;
+            $this->version = C\DEFAULT_CRAWL_FORMAT;
         }
         if (isset($info['NUM_DOCS_PER_PARTITION'])) {
             $this->num_docs_per_partition = $info['NUM_DOCS_PER_PARTITION'];
diff --git a/src/library/index_bundle_iterators/IndexBundleIterator.php b/src/library/index_bundle_iterators/IndexBundleIterator.php
index 16041c1df..af078e5a4 100644
--- a/src/library/index_bundle_iterators/IndexBundleIterator.php
+++ b/src/library/index_bundle_iterators/IndexBundleIterator.php
@@ -132,10 +132,6 @@ abstract class IndexBundleIterator implements CrawlConstants
         if (isset($this->word_key)) {
             $out .= "Word Key: " . L\toHexString($this->word_key)."\n";
             $out .= "Index Name: ".$this->index_name."\n";
-            $out .= "Media Type: ".PhraseParser::getMediaType(
-                $this->word_key) . "\n";
-            $out .= "Safe: ". PhraseParser::getSafety($this->word_key) . "\n";
-
         }
         $out .= "Number of Docs: ".$this->num_docs;
         if (isset($this->index_bundle_iterator)) {
diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php
index 1fa4d0cab..5742d9a1f 100644
--- a/src/library/index_bundle_iterators/WordIterator.php
+++ b/src/library/index_bundle_iterators/WordIterator.php
@@ -49,20 +49,21 @@ use seekquarry\yioop\library\IndexManager;
 class WordIterator extends IndexBundleIterator
 {
     /**
-     * hash of word that the iterator iterates over
+     * hash of word or phrase that the iterator iterates over
      * @var string
      */
     public $word_key;
     /**
-     * The timestamp of the index is associated with this iterator
-     * @var string
+     * Position from end of key that doesn't have to be an exact match
+     * (for phrases as using suffix tree)
+     * @var int
      */
-    public $index_name;
+    public $shift;
     /**
-     * The byte mask to apply against the word id
+     * The timestamp of the index is associated with this iterator
      * @var string
      */
-    public $mask;
+    public $index_name;
     /**
      * First shard generation that word info was obtained for
      * @var int
@@ -155,6 +156,8 @@ class WordIterator extends IndexBundleIterator
      * Creates a word iterator with the given parameters.
      *
      * @param string $word_key hash of word or phrase to iterate docs of
+     * @param string $shift up to what point in key should be a match
+     *      when do dictionary look up (for phrases because using suffix tree)
      * @param string $index_name time_stamp of the to use
      * @param bool $raw whether the $word_key is our variant of base64 encoded
      * @param array $filter an array of hashes of domains to filter from
@@ -165,34 +168,33 @@ class WordIterator extends IndexBundleIterator
      *      gotten out of this iterator (may be reordered later). This flag
      *      controls whether an upper bound of self::LIMIT_FEEDS_COUNT is
      *      imposed on the number of feed results returned
-     * @param string $mask byte mask to apply against word id, default is for
-     *     exact match
      */
-    public function __construct($word_key, $index_name, $raw = false,
+    public function __construct($word_key, $shift, $index_name, $raw = false,
         &$filter = null,
         $results_per_block = IndexBundleIterator::RESULTS_PER_BLOCK,
-        $limit_feeds = false,
-        $mask = "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF")
+        $limit_feeds = false)
     {
         if ($raw == false) {
             //get rid of out modified base64 encoding
             $word_key = L\unbase64Hash($word_key);
         }
+        if (L\crawlHashWord("media:news", true) == $word_key) {
+            $this->is_news = true;
+        }
         if ($filter != null) {
             $this->filter = & $filter;
         } else {
             $this->filter = null;
         }
         $this->word_key = $word_key;
+        $this->shift = $shift;
         $this->index_name =  $index_name;
-        $this->mask = $mask;
         list($estimated_total, $this->dictionary_info) =
-            IndexManager::getWordInfo($index_name, $word_key, 0,
-            $mask, -1, -1, C\NUM_DISTINCT_GENERATIONS, true);
+            IndexManager::getWordInfo($index_name, $word_key, $shift,
+            -1, -1, C\NUM_DISTINCT_GENERATIONS, true);
         $this->feed_shard_name = C\WORK_DIRECTORY."/feeds/index";
         if ((!C\nsdefined('NO_FEEDS') || !C\NO_FEEDS)
             && file_exists($this->feed_shard_name)) {
-            //NO_FEEDS defined true in statistic_controller.php
             $this->use_feeds = true;
         } else {
             $this->use_feeds = false;
@@ -306,8 +308,8 @@ class WordIterator extends IndexBundleIterator
             if ($this->start_generation > 0) {
                 list($estimated_total, $this->dictionary_info) =
                     IndexManager::getWordInfo($this->index_name,
-                    $this->word_key, 0, $this->mask, -1, 0,
-                    C\NUM_DISTINCT_GENERATIONS, true);
+                    $this->word_key, 0, -1, 0, C\NUM_DISTINCT_GENERATIONS,
+                    true);
                 $this->num_docs = $this->feed_count + $estimated_total;
                 ksort($this->dictionary_info);
                 $this->dictionary_info = array_values($this->dictionary_info);
@@ -362,13 +364,8 @@ class WordIterator extends IndexBundleIterator
                     $this->next_offset, $this->feed_end,
                     $this->results_per_block);
                 $time = time();
-                // C1 is the materialized meta for media:news
-                $is_news = ($this->word_key[9] == "\xC1") ? true : false;
                 foreach ($pre_results as $keys => $pre_result) {
                     $pre_results[$keys][self::IS_FEED] = true;
-                    if ($is_news) {
-                        $pre_results[$keys][self::IS_NEWS] = true;
-                    }
                     $delta = $time - $pre_result[self::SUMMARY_OFFSET];
                     $pre_results[$keys][self::DOC_RANK] = 720000 /
                         max($delta, 1);
@@ -545,8 +542,7 @@ class WordIterator extends IndexBundleIterator
                 $this->generation_pointer >= $this->num_generations) {
                 list($estimated_remaining_total, $info) =
                     IndexManager::getWordInfo($this->index_name,
-                    $this->word_key, 0,
-                    $this->mask, -1, $this->num_generations,
+                    $this->word_key, 0, -1, $this->num_generations,
                     C\NUM_DISTINCT_GENERATIONS, true);
                 if (count($info) > 0) {
                     $this->num_docs = $this->seen_docs +
diff --git a/src/library/indexing_plugins/RecipePlugin.php b/src/library/indexing_plugins/RecipePlugin.php
index b351be772..65c08b5dc 100644
--- a/src/library/indexing_plugins/RecipePlugin.php
+++ b/src/library/indexing_plugins/RecipePlugin.php
@@ -429,7 +429,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
                 $index_archive->dictionary->mergeAllTiers();
                 $this->db->setWorldPermissionsRecursive(
                     C\CRAWL_DIR.'/cache/'.
-                    self::index_data_base_name.$index_name);
+                    self::index_data_base_name . $index_name);
             }
             L\crawlLog("...Recipe plugin finished.");
         }
diff --git a/src/library/media_jobs/FeedsUpdateJob.php b/src/library/media_jobs/FeedsUpdateJob.php
index 0f664d1d1..1c9d2137a 100644
--- a/src/library/media_jobs/FeedsUpdateJob.php
+++ b/src/library/media_jobs/FeedsUpdateJob.php
@@ -591,8 +591,7 @@ class FeedsUpdateJob extends MediaJob
                 $meta_ids = $this->calculateMetas($lang, $item['PUBDATE'],
                     $source_name, $item["GUID"], $media_category);
                 $prune_shard->addDocumentWords($doc_keys, $item['PUBDATE'],
-                    $word_and_qa_lists["WORD_LIST"], $meta_ids,
-                    PhraseParser::$materialized_metas, true, false);
+                    $word_and_qa_lists["WORD_LIST"], $meta_ids, true, false);
             }
         }
         $prune_shard->save();
diff --git a/src/locale/ar/configure.ini b/src/locale/ar/configure.ini
index 7afb07372..8b3c5a6e4 100755
--- a/src/locale/ar/configure.ini
+++ b/src/locale/ar/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "البحث"
 search_view_no_index_set = ""
 search_view_calculated = "%s ثوان."
 search_view_results = "عرض  %s- %s من  %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "مؤقتاً"
@@ -810,7 +809,6 @@ search_view_inlink = "Inlinks"
 search_view_rank = "رتبة: %s"
 search_view_relevancy = "ق Rel:%"
 search_view_proximity = "ق Prox:%"
-search_view_thesaurus_score = ""
 search_view_score = "نقاط: %s"
 ;
 ; /src/views/elements
diff --git a/src/locale/bn/configure.ini b/src/locale/bn/configure.ini
index 22371535a..2e6aca5ee 100755
--- a/src/locale/bn/configure.ini
+++ b/src/locale/bn/configure.ini
@@ -800,7 +800,6 @@ search_view_search = ""
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ;
 ; /src/views/elements
diff --git a/src/locale/de/configure.ini b/src/locale/de/configure.ini
index 44abbd47d..1b5fc1df9 100755
--- a/src/locale/de/configure.ini
+++ b/src/locale/de/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Suche"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ;
 ; /src/views/elements
diff --git a/src/locale/en_US/configure.ini b/src/locale/en_US/configure.ini
index 4d8ecbda9..e1ea2704d 100644
--- a/src/locale/en_US/configure.ini
+++ b/src/locale/en_US/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Search"
 search_view_no_index_set = "No Default Index Set"
 search_view_calculated = "%s seconds."
 search_view_results = "Showing %s - %s of %s"
-search_view_thesaurus_results = "Thesaurus Results"
 search_view_possible_answer = "Possible Answer:"
 search_view_word_cloud = "Words:"
 search_view_cache = "Cached"
@@ -810,7 +809,6 @@ search_view_inlink = "Inlinks"
 search_view_rank = "Rank:%s "
 search_view_relevancy = "Rel:%s "
 search_view_proximity = "Prox:%s"
-search_view_thesaurus_score = "Thesaurus: %s"
 search_view_score = "Score:%s"
 ;
 ; /src/views/elements
diff --git a/src/locale/en_US/resources/Tokenizer.php b/src/locale/en_US/resources/Tokenizer.php
index 037134aa1..c1a18ede7 100755
--- a/src/locale/en_US/resources/Tokenizer.php
+++ b/src/locale/en_US/resources/Tokenizer.php
@@ -32,18 +32,11 @@ use seekquarry\yioop\configs as C;
 use seekquarry\yioop\library\PhraseParser;
 use seekquarry\yioop\library as L;

-/* If you would like to use wordnet for thesaurus reordering of query results
-   define the following variable in your configs/local_config.php file with
-   the path to the WordNet executable.
- */
-if (!C\nsdefined("WORDNET_EXEC")) {
-    C\nsdefine("WORDNET_EXEC", "");
-}
 /**
  * This class has a collection of methods for English locale specific
  * tokenization. In particular, it has a stemmer, a stop word remover (for
- * use mainly in word cloud creation), and a part of speech tagger (if
- * thesaurus reordering used). The stemmer is my stab at implementing the
+ * use mainly in word cloud creation), and a part of speech tagger (for
+ * question answering). The stemmer is my stab at implementing the
  * Porter Stemmer algorithm
  * presented http://tartarus.org/~martin/PorterStemmer/def.txt
  * The code is based on the non-thread safe C version given by Martin Porter.
@@ -115,16 +108,10 @@ class Tokenizer
      */
     private static $j;
     /**
-     * The constructor for a tokenizer can be used to say that a thesaurus
-     * for final query reordering is present. For english we do this if
-     * the WORDNET_EXEC variable is set. In which case we use WordNet for
-     * our reordering
+     * Do any global set up for tokenizer (none in the case of en-US)
      */
     public function __construct()
     {
-        if (C\WORDNET_EXEC != "") {
-            $this->use_thesaurus = true;
-        }
     }
     /**
      * Stub function which could be used for a word segmenter.
@@ -139,91 +126,6 @@ class Tokenizer
     {
         return $pre_segment;
     }
-    /**
-     * Computes similar words and scores from WordNet output based on word
-     * type.
-     *
-     * @param string $term term to find related thesaurus terms
-     * @param string $word_type is the type of word such as "NN" (noun),
-     *     "VB" (verb), "AJ" (adjective), or "AV" (adverb)
-     *     (all other types will be ignored)
-     * @param string $whole_query the original query $term came from
-     * @return array a sequence of
-     *     (score => array of thesaurus terms) associations. The score
-     *     representing one word sense of term
-     */
-    public static function scoredThesaurusMatches($term, $word_type,
-        $whole_query)
-    {
-        $word_map = ["VB" => "verb", "NN" => "noun", "AJ" => "adj",
-            "AV" => "adv"];
-        //Gets overview of senses of term[$i] into data
-        exec(C\WORDNET_EXEC . " $term -over", $data);
-        if (!$data || ! isset($word_map[$word_type])) { return null; }
-        $full_name = $word_map[$word_type];
-        $lexicon_output = implode("\n", $data);
-        $sense_parts = preg_split("/\bThe\s$full_name".'[^\n]*\n\n/',
-            $lexicon_output);
-        if (!isset($sense_parts[1])) {return null; }
-        list($sense, ) = preg_split("/\bOverview\sof\s/", $sense_parts[1]);
-        $definitions_for_sense = preg_split("/\d+\.\s/", $sense, -1,
-            PREG_SPLIT_NO_EMPTY);
-        $num_definitions = count($definitions_for_sense);
-        $sentence = [];
-        $similar_phrases = [];
-        $avg_scores = [];
-        for ($i = 0; $i < $num_definitions; $i++) {
-            //get sentence fragments examples of using that definition
-            preg_match_all('/\"(.*?)\"/', $definitions_for_sense[$i],
-                $matches);
-            // to separate out the words
-            preg_match('/[\w+\s\,\.\']+\s\-+/', $definitions_for_sense[$i],
-                $match_word);
-            $thesaurus_phrases = preg_split("/\s*\,\s*/",
-                strtolower(rtrim(trim($match_word[0]), "-")));
-            //remove ori ginal term from thesaurus phrases if present
-            $m = 0;
-            foreach ($thesaurus_phrases as $thesaurus_phrase) {
-                $tphrase = trim($thesaurus_phrase);
-                if ($tphrase == trim($term)) {
-                    unset($thesaurus_phrases[$m]);
-                }
-                $m++;
-            }
-            $thesaurus_phrases = array_filter($thesaurus_phrases);
-            if ($thesaurus_phrases == []) {continue;}
-            $num_example_sentences = count($matches[1]);
-            $score = [];
-            for ($j = 0; $j < $num_example_sentences; $j++) {
-                $query_parts = explode(' ', strtolower($whole_query));
-                $example_sentence_parts = explode(' ',
-                    strtolower($matches[1][$j]));
-                $score[$j] = PhraseParser::getCosineRank($query_parts,
-                    $example_sentence_parts);
-                /*  If Cosine similarity is zero then go for
-                 * intersection similarity ranking
-                 */
-                if ($score[$j] == 0) {
-                    $score[$j] = PhraseParser::getIntersection($query_parts,
-                        $example_sentence_parts);
-                }
-            }
-            /*  We use the rounded average of the above times 100 as a score
-                score for a definition. To avoid ties we store in the low
-                order digits 99 - the definition it was
-             */
-            if ($num_example_sentences > 0) {
-                $definition_score = 100 * round(
-                    100 * (array_sum($score) / $num_example_sentences))
-                    + (99 - $i);
-            } else {
-                $definition_score = 99 - $i;
-            }
-            $similar_phrases[$definition_score] = $thesaurus_phrases;
-        }
-        krsort($similar_phrases);
-        return $similar_phrases;
-    }
     /**
      * Removes the stop words from the page (used for Word Cloud generation)
      *
@@ -320,7 +222,7 @@ class Tokenizer
         'theyve','think','this','those',
         'thou','though','thoughh','thousand','throug',
         'through','throughout','thru',
-        'thus','til','tip','to','together','too',
+        'thus','til', 'till','tip','to','together','too',
         'took','toward','towards','tried',
         'tries','truly','try','trying','ts','twice','two','u','un','under',
         'unfortunately','unless','unlike','unlikely','until','unto','up',
diff --git a/src/locale/es/configure.ini b/src/locale/es/configure.ini
index 57efd1d02..450e51af0 100755
--- a/src/locale/es/configure.ini
+++ b/src/locale/es/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Buscar"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ;
 ; /src/views/elements
diff --git a/src/locale/fa/configure.ini b/src/locale/fa/configure.ini
index c73c7a95b..6c9aacf88 100755
--- a/src/locale/fa/configure.ini
+++ b/src/locale/fa/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "جستجو"
 search_view_no_index_set = ""
 search_view_calculated = "%s ثانیه"
 search_view_results = "در حال نمایش %s - %s از %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "کش شده"
@@ -810,7 +809,6 @@ search_view_inlink = "پیوندهای داخلی"
 search_view_rank = "رتبه: %s"
 search_view_relevancy = "ارتباط: %s"
 search_view_proximity = "نزدیکی: %s"
-search_view_thesaurus_score = ""
 search_view_score = "امتیاز: %s"
 ;
 ; /src/views/elements
diff --git a/src/locale/fr_FR/configure.ini b/src/locale/fr_FR/configure.ini
index 09574c152..84278455a 100755
--- a/src/locale/fr_FR/configure.ini
+++ b/src/locale/fr_FR/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Rechercher"
 search_view_no_index_set = ""
 search_view_calculated = "%s secondes."
 search_view_results = "Affichage de %s - %s sur %s r&eacute;sultats"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "En&nbsp;Cache"
@@ -810,7 +809,6 @@ search_view_inlink = "Liens retour"
 search_view_rank = "Rang: %s"
 search_view_relevancy = "Pertinence: %s"
 search_view_proximity = "Proximit&eacute;: %s"
-search_view_thesaurus_score = ""
 search_view_score = "Total: %s"
 ;
 ; /src/views/elements
diff --git a/src/locale/he/configure.ini b/src/locale/he/configure.ini
index c8fa6b01b..2a9cbfffd 100755
--- a/src/locale/he/configure.ini
+++ b/src/locale/he/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "חפש"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ;
 ; /src/views/elements
diff --git a/src/locale/hi/configure.ini b/src/locale/hi/configure.ini
index 5d9b1dba4..26f554ce0 100755
--- a/src/locale/hi/configure.ini
+++ b/src/locale/hi/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "खोज"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ;
 ; /src/views/elements
diff --git a/src/locale/in_ID/configure.ini b/src/locale/in_ID/configure.ini
index 2acc2dd67..9af7f978f 100755
--- a/src/locale/in_ID/configure.ini
+++ b/src/locale/in_ID/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Cari"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = "Hasil"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = "Urutan"
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ;
 ; /src/views/elements
diff --git a/src/locale/it/configure.ini b/src/locale/it/configure.ini
index 2004095e2..c8e0324b3 100755
--- a/src/locale/it/configure.ini
+++ b/src/locale/it/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Cerca"
 search_view_no_index_set = ""
 search_view_calculated = "Calccolati in %s secondi."
 search_view_results = "Mostra risultati %s - %s di %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "Archivio"
@@ -810,7 +809,6 @@ search_view_inlink = "Inlink"
 search_view_rank = "Pos.: %s "
 search_view_relevancy = "Rel: %s "
 search_view_proximity = "Pros: %s"
-search_view_thesaurus_score = ""
 search_view_score = "Punteggio %s"
 ;
 ; /src/views/elements
diff --git a/src/locale/ja/configure.ini b/src/locale/ja/configure.ini
index c07c80463..91ecd6c6d 100755
--- a/src/locale/ja/configure.ini
+++ b/src/locale/ja/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "検索"
 search_view_no_index_set = ""
 search_view_calculated = "%s分で計算しました。"
 search_view_results = "結果表示%s ー %s の %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "キャッシューしました。"
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = "ランク:%s"
 search_view_relevancy = "関連:%s"
 search_view_proximity = "近さ: %s"
-search_view_thesaurus_score = ""
 search_view_score = "スコア %s"
 ;
 ; /src/views/elements
diff --git a/src/locale/kn/configure.ini b/src/locale/kn/configure.ini
index 66abefcb7..de6e899c9 100755
--- a/src/locale/kn/configure.ini
+++ b/src/locale/kn/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "ಹುಡುಕು"
 search_view_no_index_set = ""
 search_view_calculated = "ಲೆಕ್ಕಾಚಾರದ ಸಮಯ %s ಸೆಕೆಂಡು"
 search_view_results = "ತೋರಿಸುತ್ತಿರುವ ಫಲಿತಾಂಶಗಳು %s - %s ಆಫ್ %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "ಸಿದ್ಧ ಸ್ಮೃತಿಕೋಶದಿಂದ ನೋಡಿ"
@@ -810,7 +809,6 @@ search_view_inlink = "ಒಳ ಕೊಂಡಿ"
 search_view_rank = "ಸ್ಥಾನ: %s"
 search_view_relevancy = "ಪ್ರಾಸ್ತಾವಿಕ: %s"
 search_view_proximity = "ಸಾನಿಧ್ಯ: %s"
-search_view_thesaurus_score = ""
 search_view_score = "ಅಂಕ: %s "
 ;
 ; /src/views/elements
diff --git a/src/locale/ko/configure.ini b/src/locale/ko/configure.ini
index 1734b5a6b..ef554515d 100755
--- a/src/locale/ko/configure.ini
+++ b/src/locale/ko/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "검색"
 search_view_no_index_set = ""
 search_view_calculated = "%s 초 결과 완료"
 search_view_results = "결과 %s - %s 의 %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "캐시 됀것"
@@ -810,7 +809,6 @@ search_view_inlink = "인링크"
 search_view_rank = "랭크: %s"
 search_view_relevancy = "관련성: %s "
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = "점수 %s"
 ;
 ; /src/views/elements
diff --git a/src/locale/nl/configure.ini b/src/locale/nl/configure.ini
index bf4a694da..79f67927c 100644
--- a/src/locale/nl/configure.ini
+++ b/src/locale/nl/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "zoeken"
 search_view_no_index_set = "Geen Standaard Index Set"
 search_view_calculated = " %s seconden."
 search_view_results = "Toont %s - %s van %s"
-search_view_thesaurus_results = "thesaurus Resultaten"
 search_view_possible_answer = ""
 search_view_word_cloud = "woorden:"
 search_view_cache = "gecached"
@@ -810,7 +809,6 @@ search_view_inlink = "inlinks"
 search_view_rank = "Rang: %s"
 search_view_relevancy = "Rel: %s"
 search_view_proximity = "Prox: %s"
-search_view_thesaurus_score = "Thesaurus: %s"
 search_view_score = "Score: %s"
 ;
 ; /src/views/elements
diff --git a/src/locale/pl/configure.ini b/src/locale/pl/configure.ini
index 42144e947..e632fa466 100755
--- a/src/locale/pl/configure.ini
+++ b/src/locale/pl/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Szukaj"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ;
 ; /src/views/elements
diff --git a/src/locale/pt/configure.ini b/src/locale/pt/configure.ini
index a703975b6..52ae0b154 100755
--- a/src/locale/pt/configure.ini
+++ b/src/locale/pt/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Pesquisa"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ;
 ; /src/views/elements
diff --git a/src/locale/ru/configure.ini b/src/locale/ru/configure.ini
index 345a881a0..cdacf6e7d 100755
--- a/src/locale/ru/configure.ini
+++ b/src/locale/ru/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Поиск"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ;
 ; /src/views/elements
diff --git a/src/locale/te/configure.ini b/src/locale/te/configure.ini
index f7f7d2c51..b4ec09493 100644
--- a/src/locale/te/configure.ini
+++ b/src/locale/te/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "అన్వేషించు"
 search_view_no_index_set = "డిఫాల్ట్ సూచిక సెట్ చేసి లేదు"
 search_view_calculated = "%s సెకన్లు"
 search_view_results = "చూపించేది %s - %s of %s"
-search_view_thesaurus_results = "థెసారస్ ఫలితాలు"
 search_view_possible_answer = ""
 search_view_word_cloud = "వర్డ్స్:"
 search_view_cache = "కేష్ చేయబడినవి"
@@ -810,7 +809,6 @@ search_view_inlink = "ఇన్ లింక్స్"
 search_view_rank = "రేంక్:%s"
 search_view_relevancy = "సంబంధిత:%s"
 search_view_proximity = "సామీప్యత:%s"
-search_view_thesaurus_score = "థెసారస్: %s"
 search_view_score = "స్కోర్:%s"
 ;
 ; /src/views/elements
diff --git a/src/locale/th/configure.ini b/src/locale/th/configure.ini
index bb94f8fbb..bfeaa253c 100755
--- a/src/locale/th/configure.ini
+++ b/src/locale/th/configure.ini
@@ -800,7 +800,6 @@ search_view_search = ""
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ;
 ; /src/views/elements
diff --git a/src/locale/tr/configure.ini b/src/locale/tr/configure.ini
index 7488d07d0..bfc4b6699 100755
--- a/src/locale/tr/configure.ini
+++ b/src/locale/tr/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Ara"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ;
 ; /src/views/elements
diff --git a/src/locale/vi_VN/configure.ini b/src/locale/vi_VN/configure.ini
index 5e46a5366..453814424 100755
--- a/src/locale/vi_VN/configure.ini
+++ b/src/locale/vi_VN/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "T&igrave;m Kiếm"
 search_view_no_index_set = ""
 search_view_calculated = "%s gi&acirc;y."
 search_view_results = "Cho kết quả tứ %s - %s của %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "Trang&nbsp;gốc"
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = "Thứ Tự: %s"
 search_view_relevancy = "Th&iacute;ch hợp: %s"
 search_view_proximity = "Gần: %s"
-search_view_thesaurus_score = ""
 search_view_score = "Điểm: %s"
 ;
 ; /src/views/elements
diff --git a/src/locale/zh_CN/configure.ini b/src/locale/zh_CN/configure.ini
index 1bf771e54..c26ccdc1a 100755
--- a/src/locale/zh_CN/configure.ini
+++ b/src/locale/zh_CN/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "搜尋"
 search_view_no_index_set = ""
 search_view_calculated = "總計: %s 秒"
 search_view_results = "結果"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = "排名: %s 名"
 search_view_relevancy = "關聯度:  %s 趴"
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = "分數"
 ;
 ; /src/views/elements
diff --git a/src/models/ParallelModel.php b/src/models/ParallelModel.php
index 86a47b0bb..aa9ef9480 100755
--- a/src/models/ParallelModel.php
+++ b/src/models/ParallelModel.php
@@ -369,15 +369,14 @@ class ParallelModel extends Model
         if (!isset($index_archive->generation_info['ACTIVE'])) {
             return false;
         }
-        $mask = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
         $num_generations = $index_archive->generation_info['ACTIVE'];
-        $hash_key = ($is_key) ? L\crawlHashWord($url_or_key, true, $mask) :
-            L\crawlHashWord("info:$url_or_key", true, $mask);
-        $info = IndexManager::getWordInfo($index_name, $hash_key, 0, $mask, 1);
+        $hash_key = ($is_key) ? L\crawlHashWord($url_or_key, true) :
+            L\crawlHashWord("info:$url_or_key", true);
+        $info = IndexManager::getWordInfo($index_name, $hash_key, 0, 1);
         if (!isset($info[0][4])) {
             return false;
         }
-        $word_iterator = new WordIterator($info[0][4], $index_name, true);
+        $word_iterator = new WordIterator($info[0][4], 0, $index_name, true);
         if (is_array($next_docs = $word_iterator->nextDocsWithWord())) {
             $doc_info = current($next_docs);
             if (!$doc_info) {
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index 17dd9532a..63e457a83 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -35,7 +35,6 @@ use seekquarry\yioop\library as L;
 use seekquarry\yioop\library\AnalyticsManager;
 use seekquarry\yioop\library\IndexManager;
 use seekquarry\yioop\library\PhraseParser;
-use seekquarry\yioop\library\Thesaurus;
 use seekquarry\yioop\library\index_bundle_iterators as I;

 /**
@@ -494,10 +493,6 @@ class PhraseModel extends ParallelModel
             $results['TOTAL_ROWS'] > 0) {
             $output = $this->formatPageResults($results, $format_words,
                 $description_length);
-            if (isset($out_results['THESAURUS_VARIANTS'])) {
-                $output['THESAURUS_VARIANTS'] =
-                    $out_results['THESAURUS_VARIANTS'];
-            }
             if (!empty($answer_score_map)) {
                 arsort($answer_score_map);
                 reset($answer_score_map);
@@ -534,10 +529,10 @@ class PhraseModel extends ParallelModel
         $in2 = $indent . $indent;
         $in3 = $in2 . $indent;
         $in4 = $in2. $in2;
-        $phrase = " ".$phrase;
+        $phrase = " " . $phrase;
         $phrase = $this->parseIfConditions($phrase);
         $phrase_string = $phrase;
-        list($found_metas, $found_materialized_metas, $disallow_phrases,
+        list($found_metas, $disallow_phrases,
             $phrase_string, $query_string, $index_name, $weight) =
             $this->extractMetaWordInfo($phrase);
         /*
@@ -585,8 +580,7 @@ class PhraseModel extends ParallelModel
                 $new_words =
                     PhraseParser::extractPhrases($phrase_part, $locale_tag,
                          $index_name);
-                if (isset($new_words[0]) && strpos($new_words[0], " ") > 0 &&
-                    $found_materialized_metas == []) {
+                if (isset($new_words[0]) && strpos($new_words[0], " ") > 0) {
                     array_pop($new_words);
                 }
                 $base_words = array_merge($base_words, $new_words);
@@ -597,11 +591,6 @@ class PhraseModel extends ParallelModel
         //stemmed, if have stemmer
         $index_version = IndexManager::getVersion($index_name);
         $add_metas = $found_metas;
-        $immaterial_metas = array_diff(
-            $found_metas, $found_materialized_metas);
-        if (count($immaterial_metas) > 0 && $index_version > 0) {
-            $add_metas = $immaterial_metas;
-        }
         $words = array_merge($base_words, $add_metas);
         if (count($words) == 0 && count($disallow_phrases) > 0) {
             $words[] = "site:any";
@@ -637,20 +626,7 @@ class PhraseModel extends ParallelModel
         if (isset($words) && count($words) == 1 &&
             count($disallow_phrases) < 1 && !strpos($words[0], " ")) {
             $phrase_string = $words[0];
-            if ($index_version == 0) {
-                $tmp_hash = L\allCrawlHashPaths($phrase_string);
-                $tmp_hash = (is_array($tmp_hash)) ? $tmp_hash : [$tmp_hash];
-                $phrase_hash = array_merge([$tmp_hash],
-                    [L\crawlHash($phrase_string)]);
-            } else {
-                if ($found_materialized_metas == []) {
-                    $phrase_hash = L\allCrawlHashPaths($phrase_string);
-                } else {
-                    $phrase_hash = L\allCrawlHashPaths($phrase_string,
-                        $found_materialized_metas,
-                        PhraseParser::$materialized_metas);
-                }
-            }
+            $phrase_hash = L\allCrawlHashPaths($phrase_string);
             $word_struct = ["KEYS" => [$phrase_hash],
                 "QUOTE_POSITIONS" => null, "DISALLOW_KEYS" => [],
                 "WEIGHT" => $weight, "INDEX_NAME" => $index_name,
@@ -658,32 +634,9 @@ class PhraseModel extends ParallelModel
         } else {
             //get a raw list of words and their hashes
             $hashes = [];
-            $metas_accounted = false;
-            $materialized_metas = [];
-            $meta_keys = [];
             $word_keys = [];
             foreach ($words as $word) {
-                if (!$metas_accounted && substr_count($word, " ") == 0
-                    && !in_array($word, $found_metas)) {
-                    $metas_accounted = true;
-                    $materialized_metas = $found_materialized_metas;
-                }
-                $tmp_hash = L\allCrawlHashPaths($word, $materialized_metas,
-                    PhraseParser::$materialized_metas);
-                if ($index_version == 0) {
-                    $tmp_hash = (is_array($tmp_hash)) ? $tmp_hash : [$tmp_hash];
-                    $test =  array_merge($tmp_hash, [L\crawlHash($word)]);
-                } else {
-                    if (in_array($word, $found_materialized_metas) &&
-                        !$metas_accounted) {
-                        $meta_keys[] = $tmp_hash;
-                    } else {
-                        $word_keys[] = $tmp_hash;
-                    }
-                }
-            }
-            if (!$metas_accounted) {
-                $word_keys = array_merge($word_keys, $meta_keys);
+                $word_keys[] = L\allCrawlHashPaths($word);
             }
             if (count($word_keys) == 0) {
                 $word_keys = null;
@@ -774,7 +727,6 @@ class PhraseModel extends ParallelModel
         $index_name = $this->index_name;
         $weight = 1;
         $found_metas = [];
-        $found_materialized_metas = [];
         $disallow_phrases = [];
         $phrase_string = $phrase;
         $phrase_string = str_replace("&", "&amp;", $phrase_string);
@@ -791,24 +743,6 @@ class PhraseModel extends ParallelModel
                 ['i:', 'index:', 'w:', 'weight:', '\-'])) {
                 $matches = $matches[2];
                 $found_metas = array_merge($found_metas, $matches);
-                if (in_array($meta_word, PhraseParser::$materialized_metas)) {
-                    $seen_matches = [];
-                    $seen_match_count = 0;
-                    foreach ($matches as $pre_material_match) {
-                        $match_kinds = explode(":", $pre_material_match);
-                        if (!in_array($match_kinds[1], ["all"]) &&
-                            !isset($match_kinds[2])) {
-                            $found_materialized_metas[] = $pre_material_match;
-                            if ($seen_match_count > 0 &&
-                                !isset($seen_matches[$pre_material_match])) {
-                                $materialized_match_conflict = true;
-                                break 2;
-                            }
-                            $seen_matches[$pre_material_match] = true;
-                            $seen_match_count++;
-                        }
-                    }
-                }
             } elseif ($meta_word == '\-') {
                 if (count($matches[0]) > 0) {
                     foreach ($matches[2] as $disallowed) {
@@ -828,28 +762,18 @@ class PhraseModel extends ParallelModel
         }
         if ($materialized_match_conflict) {
             $found_metas = [];
-            $found_materialized_metas = [];
             $disallow_phrases = [];
             $phrase_string = "";
         }
         $found_metas = array_unique($found_metas);
-        $found_materialized_metas = array_unique($found_materialized_metas);
-        if (empty(trim($phrase_string)) && count($found_metas) == 2
-            && (in_array("site:doc", $found_metas)
-            || in_array("site:any", $found_metas))) {
-            /*site:doc and site:any doesn't work with materialized metas by
-              themselves */
-            array_pop($found_materialized_metas);
-        }
         $disallow_phrases = array_unique($disallow_phrases);
         $phrase_string = mb_ereg_replace("&amp;", "_and_", $phrase_string);
         $query_string = mb_ereg_replace(C\PUNCT, " ", $phrase_string);
         $query_string = preg_replace("/(\s)+/", " ", $query_string);
         $query_string = mb_ereg_replace('_and_', '&', $query_string);
         $phrase_string = mb_ereg_replace('_and_', '&', $phrase_string);
-        return [$found_metas, $found_materialized_metas,
-            $disallow_phrases, $phrase_string, $query_string, $index_name,
-            $weight];
+        return [$found_metas, $disallow_phrases, $phrase_string, $query_string,
+            $index_name, $weight];
     }
     /**
      * Ideally, this function tries to guess from the query what the
@@ -1149,7 +1073,7 @@ class PhraseModel extends ParallelModel
             $save_timestamp_name == "") {
             $mem_tmp = serialize($raw).serialize($word_structs).
                 $original_query . $this->index_name;
-            $summary_hash = L\crawlHash($mem_tmp.":".$limit.":".$num);
+            $summary_hash = L\crawlHash($mem_tmp . ":" . $limit . ":" . $num);
             if ($use_cache_if_allowed) {
                 $cache_success = true;
                 $results = self::$cache->get($summary_hash);
@@ -1405,62 +1329,12 @@ class PhraseModel extends ParallelModel
         }
         $results['PAGES'] = $out_pages;
         $results['TIME'] = time();
-        $lang = L\guessLocaleFromString($original_query);
-        $tokenizer = PhraseParser::getTokenizer($lang);
-        //only use tokenizer if no meta word or disjuncts in query
-        if (!preg_match('/(\||\:)/u', $original_query) &&
-            $tokenizer && method_exists($tokenizer, "scoredThesaurusMatches")
-            && method_exists($tokenizer, "tagPartsOfSpeechPhrase")
-            && isset($tokenizer->use_thesaurus)) {
-            $results = $this->sortByThesaurusScore($results, $original_query,
-                $lang);
-            if (!$out_pages) {
-                $results['PAGES'] = $out_pages;
-            }
-        }
         if (!empty($_SERVER["USE_CACHE"]) &&
             $save_timestamp_name == "") {
             self::$cache->set($summary_hash, $results);
         }
         return $results;
     }
-    /**
-     * If user selects Wordnet feature in page options then only
-     * do WordNet processing. Also user has to specify the WordNet directory
-     *
-     * @param array $results document summaries
-     * @param string $original_query the original query that we are computing
-     *      results for
-     * @param string $lang locale tag of query
-     * @return array results document summaries sorted by wordnet score
-     */
-    public function sortByThesaurusScore($results, $original_query, $lang)
-    {
-        $summaries = [];
-        $pages = $results['PAGES'];
-        foreach ($pages as $page) {
-            $summaries[] = $page[self::DESCRIPTION];
-        }
-        $index_name = $this->index_name;
-        $phrases = Thesaurus::getSimilarPhrases($original_query, $index_name,
-            $lang);
-        $results['THESAURUS_VARIANTS'] = $phrases;
-        if (!empty($phrases)) {
-            $thesaurus_scores = Thesaurus::scorePhrasesSummaries($phrases,
-                $summaries);
-            //Store the BM25 score for each page in result array
-            $num_scores = count($thesaurus_scores);
-            for ($i = 0; $i < $num_scores; $i++) {
-                $pages[$i][self::THESAURUS_SCORE] = $thesaurus_scores[$i];
-                L\orderCallback($pages[$i], $pages[$i], self::THESAURUS_SCORE);
-            }
-            if (array_sum($thesaurus_scores) != 0) {
-                usort($pages, C\NS_LIB . "orderCallback");
-            }
-            $results['PAGES'] = $pages;
-        }
-        return $results;
-    }
     /**
      * Used to lookup summary info for the pages provided (using their)
      * self::SUMMARY_OFFSET field. If any of the lookup-ed summaries
@@ -1601,7 +1475,7 @@ class PhraseModel extends ParallelModel
      */
     public function getQueryIterator($word_structs, &$filter, $raw,
         &$to_retrieve, $queue_servers = [], $original_query = "",
-        $save_timestamp_name="", $limit_feeds = true)
+        $save_timestamp_name = "", $limit_feeds = true)
     {
         $iterators = [];
         $total_iterators = 0;
@@ -1671,6 +1545,7 @@ class PhraseModel extends ParallelModel
                     continue;
                 }
                 $sum = 0;
+                $lookup_cutoff = max(C\MIN_RESULTS_TO_GROUP, $to_retrieve);
                 for ($i = 0; $i < $total_iterators; $i++) {
                     $current_key = (is_string($distinct_word_keys[$i])) ?
                         $distinct_word_keys[$i] : (is_string(
@@ -1686,74 +1561,25 @@ class PhraseModel extends ParallelModel
                         $min_group_override = true;
                     } else {
                         //can happen if exact phrase search suffix approach used
-                        if (isset($distinct_word_keys[$i][0][0]) &&
-                            is_array($distinct_word_keys[$i][0][0])) {
-                            $distinct_keys = [
-                                $distinct_word_keys[$i][0][1]];
-                        } elseif (isset($distinct_word_keys[$i][0]) &&
+                        if (isset($distinct_word_keys[$i][0]) &&
                             is_array($distinct_word_keys[$i][0])) {
                             $distinct_keys = $distinct_word_keys[$i];
                         } else {
                             $distinct_keys = [$distinct_word_keys[$i]];
                         }
-                        $out_keys = [];
-                        $old_distinct_key_id = "";
-                        foreach ($distinct_keys as $distinct_key) {
-                            if (is_array($distinct_key)) {
-                                if (!isset($distinct_key[2]) &&
-                                    isset($distinct_key[1])) {
-                                    $distinct_keys[] = $distinct_key[1];
-                                }
-                                $shift = (isset($distinct_key[1])) ?
-                                    $distinct_key[1] : 0;
-                                $mask = (isset($distinct_key[2])) ?
-                                    $distinct_key[2] : "\x00\x00\x00\x00\x00" .
-                                    "\x00\x00\x00\x00\x00\x00";
-                                if (isset($distinct_key[3])) {
-                                    $old_distinct_key_id =
-                                        L\unbase64Hash($distinct_key[3]);
-                                }
-                                $distinct_key_id = L\unbase64Hash(
-                                    $distinct_key[0]);
-                            } else {
-                                $shift = 0;
-                                $mask = "\x00\x00\x00\x00\x00" .
-                                    "\x00\x00\x00\x00\x00\x00";
-                                $distinct_key_id =
-                                    L\unbase64Hash($distinct_key);
-                            }
-                            $lookup_cutoff = max(C\MIN_RESULTS_TO_GROUP,
-                                $to_retrieve);
-                            $info = IndexManager::getWordInfo($index_name,
-                                $distinct_key_id, $shift, $mask, -1, -1,
-                                C\NUM_DISTINCT_GENERATIONS);
-                            if ($old_distinct_key_id != "") {
-                                $old_info = IndexManager::getWordInfo(
-                                    $index_name, $old_distinct_key_id, $shift,
-                                    $mask, -1, -1, C\NUM_DISTINCT_GENERATIONS);
-                                if ($info !== false && $old_info !== false) {
-                                    $info = array_merge($info, $old_info);
-                                } elseif ($old_info !== false) {
-                                    $info = $old_info;
-                                }
-                            }
-                            if ($info != []) {
-                                $tmp_keys = L\arrayColumnCount($info, 4, 3);
-                                $sum += array_sum($tmp_keys);
-                                $out_keys = array_merge($out_keys, $tmp_keys);
-                            }
-                            if ($sum > $lookup_cutoff) {
-                                break;
-                            }
-                        }
-                        $out_keys = array_keys(array_slice($out_keys, 0, 50));
+                        $sum = 0;
                         $tmp_word_iterators =[];
                         $m = 0;
-                        foreach ($out_keys as $distinct_key) {
+                        foreach ($distinct_keys as $distinct_key) {
+                            $shift = (isset($distinct_key[1])) ?
+                                $distinct_key[1] : 0;
+                            $distinct_key_id = L\unbase64Hash(
+                                $distinct_key[0]);
                             $tmp_word_iterators[$m] =
-                                new I\WordIterator($distinct_key,
+                                new I\WordIterator($distinct_key_id, $shift,
                                 $index_name, true, $filter, $to_retrieve,
                                 $limit_feeds);
+                            $sum += $tmp_word_iterators[$m]->num_docs;
                             if ($tmp_word_iterators[$m]->dictionary_info !=
                                 [] ||
                                 $tmp_word_iterators[$m]->feed_count > 0) {
@@ -1762,6 +1588,9 @@ class PhraseModel extends ParallelModel
                             } else {
                                 unset($tmp_word_iterators[$m]);
                             }
+                            if ($sum > $lookup_cutoff) {
+                                break;
+                            }
                         }
                         if ($m == 1) {
                             $word_iterators[$i] = $tmp_word_iterators[0];
@@ -1780,9 +1609,11 @@ class PhraseModel extends ParallelModel
                 $num_disallow_keys = count($disallow_keys);
                 if ($num_disallow_keys > 0) {
                     for ($i = 0; $i < $num_disallow_keys; $i++) {
+                        /* notice for now shift always 0 - you can't disallow
+                           phrases */
                         $disallow_iterator =
-                            new I\WordIterator($disallow_keys[$i], $index_name,
-                                false, $filter);
+                            new I\WordIterator($disallow_keys[$i], 0,
+                                $index_name, false, $filter);
                         $word_iterators[$num_word_keys + $i] =
                             new I\NegationIterator($disallow_iterator);
                     }
diff --git a/src/scripts/suggest.js b/src/scripts/suggest.js
index 6c19dd848..9fa3abcbe 100644
--- a/src/scripts/suggest.js
+++ b/src/scripts/suggest.js
@@ -693,12 +693,9 @@ function spellCheck()
     }
     if (referenceNode) {
         var corrected_spell = elt("spell-check");
-        var thesaurus_results = elt("thesaurus-results");
         /* corrected_spell might not be present if WORD_SUGGEST off
-           If there are already thesaurus results we don't want to
-           clutter the top area so also don't suggest
          */
-        if (!corrected_spell || thesaurus_results) {return; }
+        if (!corrected_spell) {return; }
         var logged_in = elt("csrf-token");
         if (logged_in) {
             var csrf_token = elt("csrf-token").value;
diff --git a/src/views/SearchView.php b/src/views/SearchView.php
index c443c9f5a..b0ea7cd49 100755
--- a/src/views/SearchView.php
+++ b/src/views/SearchView.php
@@ -211,39 +211,15 @@ class SearchView extends View implements CrawlConstants
         <?php
         } ?>
         <div class="serp-body" >
-        <?php
-        $similar_words = $data['THESAURUS_VARIANTS'];
-        $use_thesaurus = C\WORD_SUGGEST && count($similar_words) > 0 &&
-            !$_SERVER["MOBILE"];
-        if ($use_thesaurus) { ?>
-            <div id="thesaurus-results" class="thesaurus">
-            <?php
-                e(tl('search_view_thesaurus_results'));
-                foreach ($similar_words as $word) {
-                    e("<br />");
-                    ?><span><a href="?<?= $token_string_amp
-                    ?>its=<?= $data['its'] ?>&amp;q=<?=$word ?>"><?=
-                    $word ?></a></span>
-                    <?php
-                }
-            ?>
-            </div>
-        <?php
-        }
-        if ($use_thesaurus) { ?>
-            <div class="thesaurus-serp-results"> <?php
-        } else { ?>
-            <div class="serp-results">
-        <?php
-        }
+        <div class="serp-results"><?php
         if (!$is_landing) {
             $this->element("displayadvertisement")->render($data);
         }
-        if (!empty($data['BEST_ANSWER'])) { ?>
-            <div id="best-answer" class="echo-link">
+        if (!empty($data['BEST_ANSWER'])) {
+            ?><div id="best-answer" class="echo-link">
                  <?= $data['BEST_ANSWER'] ?>
-            </div>
-        <?php }
+            </div><?php
+        }
         foreach ($data['PAGES'] as $page) {
             if (isset($page[self::URL])) {
                 if (substr($page[self::URL], 0, 4) == "url|") {
@@ -292,8 +268,8 @@ class SearchView extends View implements CrawlConstants
                     $image_subsearch);
                 e( "</div>");
                 continue;
-            } else if (isset($page['NEWS'])) {
-                $this->helper("feeds")->render($page['NEWS'],
+            } else if (isset($page['FEED'])) {
+                $this->helper("feeds")->render($page['FEED'],
                     $token, $data['QUERY'],  $subsearch,
                     $data['OPEN_IN_TABS']);
                 e( "</div>");
@@ -437,12 +413,6 @@ class SearchView extends View implements CrawlConstants
                     number_format($page[self::RELEVANCE], 2) )."\n");
                 e(tl('search_view_proximity',
                     number_format($page[self::PROXIMITY], 2) )."\n");
-                if (isset($page[self::THESAURUS_SCORE]) &&
-                    $page[self::THESAURUS_SCORE] > 0) {
-                    e(tl('search_view_thesaurus_score',
-                        number_format($page[self::THESAURUS_SCORE], 2)) .
-                        "\n");
-                }
                 if (isset($page[self::USER_RANKS])) {
                     foreach ($page[self::USER_RANKS] as $label => $score) {
                         e($label.":".number_format($score/6553.6, 2)."\n");
diff --git a/tests/IndexShardTest.php b/tests/IndexShardTest.php
index a1bc2abbc..22d6f758e 100644
--- a/tests/IndexShardTest.php
+++ b/tests/IndexShardTest.php
@@ -334,7 +334,7 @@ class IndexShardTest extends UnitTest
         $meta_ids = ["EEEEEEEE", "FFFFFFFF"];
         //test saving and loading to a file
         $this->test_objects['shard']->addDocumentWords($docid,
-            $offset, $word_counts, $meta_ids, [], true);
+            $offset, $word_counts, $meta_ids, true);
         $this->test_objects['shard']->save();
         $this->test_objects['shard2'] = IndexShard::load(C\WORK_DIRECTORY.
             "/shard.txt");
ViewGit