Chris Pollett [2018-06-18 15:Jun:th]
diff --git a/src/configs/Config.php b/src/configs/Config.php
index 47e137718..293cacc7f 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -710,6 +710,8 @@ nsconddefine('MAX_LINKS_PER_PAGE', 50);
 nsconddefine('AVG_LINKS_PER_PAGE', 24);
 /** maximum number of links to consider from a sitemap page */
+/**  minimum char length of link text before gets its own document */
+nsconddefine('MIN_LINKS_TEXT_DOC', 6);
 /**  maximum number of words from links to consider on any given page */
 nsconddefine('MAX_LINKS_WORD_TEXT', 100);
 /**  maximum length of urls to try to queue, this is important for
diff --git a/src/configs/PublicHelpPages.php b/src/configs/PublicHelpPages.php
index ca3f33c2a..03444eaca 100644
--- a/src/configs/PublicHelpPages.php
+++ b/src/configs/PublicHelpPages.php
@@ -77,620 +77,620 @@ page_footer=


-END_HEAD_VARS=Yioop Wiki Syntax=
-: Wiki syntax is a lightweight way to markup a text document so that
-it can be formatted and drawn nicely by Yioop.
-This page briefly describes the wiki syntax supported by Yioop.
-: In wiki syntax headings of documents and sections are written as follows:
-and would look like:
-: In Yioop two new lines indicates a new paragraph. You can control
-the indent of a paragraph by putting colons followed by a space in front of it:
-: some indent
-:: a little more
-::: even more
-:::: that's sorta crazy
-which looks like:
-: some indent
-:: a little more
-::: even more
-:::: that's sorta crazy
-==Horizontal Rule==
-: Sometimes it is convenient to separate paragraphs or sections with a horizontal
-rule. This can be done by placing four hyphens on a line by themselves:
-This results in a line that looks like:
-==Text Formatting Within Paragraphs==
-: Within a paragraph it is often convenient to make some text bold, italics,
-underlined, etc. Below is a quick summary of how to do this:
-===Wiki Markup===
-|<nowiki>'''''bold and italic'''''</nowiki>|'''''bold and italic'''''
-===HTML Tags===
-: Yioop also supports several html tags such as:
-|<nowiki><s>strike through</s> or
-<strike>strike through</strike> </nowiki>|<s>strike through</s>
-|<nowiki><sup>superscript</sup> and
-<sub>subscript</sub></nowiki>|<sup>superscript</sup> and
-===Spacing within Paragraphs===
-: The HTML entity
-<nowiki> </nowiki>
-can be used to create a non-breaking space. The tag
-can be used to produce a line break.
-==Preformatted Text and Unformatted Text==
-: You can force text to be formatted as you typed it rather
-than using the layout mechanism of the browser using the
-<nowiki><pre>preformatted text tag.</pre></nowiki>
-Alternatively, a sequence of lines all beginning with a
-space character will also be treated as preformatted.
-: Wiki markup within pre tags is still parsed by Yioop.
-If you would like to add text that is not parsed, enclosed
-it in <tt><`mbox{nowiki}`> </`mbox{nowiki}`></tt> tags.
-==Styling Text Paragraphs==
-: Yioop wiki syntax offers a number of templates for
-control the styles, and alignment of text for
-a paragraph or group of paragraphs:<br />
-`{{`left| some text`}}`,<br /> `{{`right| some text`}}`,<br />
-and<br />
-`{{`center| some text`}}`<br /> can be used to left-justify,
-right-justify, and center a block of text. For example,
-the last command, would produce:
-some text
-If you know cascading style sheets (CSS), you can set
-a class or id selector for a block of text using:<br />
-`{{`class="my-class-selector" some text`}}`<br />and<br />
-`{{`id="my-id-selector" some text`}}`.<br />
-You can also apply inline styles to a block of text
-using the syntax:<br />
-`{{`style="inline styles" some text`}}`.<br />
-For example, `{{`style="color:red" some text`}}` looks
-like {{style="color:red" some text}}.
-: The Yioop Wiki Syntax supported of ways of listing items:
-bulleted/unordered list, numbered/ordered lists, and
-definition lists. Below are some examples:
-===Unordered Lists===
-* Item1
-** SubItem1
-** SubItem2
-*** SubSubItem1
-* Item 2
-* Item 3
-would be drawn as:
-* Item1
-** SubItem1
-** SubItem2
-*** SubSubItem1
-* Item 2
-* Item 3
-===Ordered Lists===
-# Item1
-## SubItem1
-## SubItem2
-### SubSubItem1
-# Item 2
-# Item 3
-# Item1
-## SubItem1
-## SubItem2
-### SubSubItem1
-# Item 2
-# Item 3
-===Mixed Lists===
-# Item1
-#* SubItem1
-#* SubItem2
-#*# SubSubItem1
-# Item 2
-# Item 3
-# Item1
-#* SubItem1
-#* SubItem2
-#*# SubSubItem1
-# Item 2
-# Item 3
-===Definition Lists===
-;Term 1: Definition of Term 1
-;Term 2: Definition of Term 2
-;Term 1: Definition of Term 1
-;Term 2: Definition of Term 2
-: A table begins with {`|`  and ends with `|`}. Cells are separated with | and
-rows are separated with |- as can be seen in the following
-Headings for columns and rows can be made by using an exclamation point, !,
-rather than a vertical bar |. For example,
-Captions can be added using the + symbol:
-|+ My Caption
-|+ My Caption
-Finally, you can put a CSS class or style attributes (or both) on the first line
-of the table to further control how it looks:
-{| class="wikitable"
-|+ My Caption
-{| class="wikitable"
-|+ My Caption
-Within a cell attributes like align, valign, styles, and class can be used. For
-| style="text-align:right;"| a| b
-| lalala | lalala
-| style="text-align:right;"| a| b
-| lalala | lalala
-: Math can be included into a wiki document by either using the math tag:
-\sum_{i=1}^{n} i = frac{(n+1)(n)}{2}
-\sum_{i=1}^{n} i = frac{(n+1)(n)}{2}
-or by enclosing the math in backticks:
-`[[1, -2],[3,4]]`
-`[[1, -2],[3,4]]`.
-Rendering of math is done using [[|MathJax]], making us of the [[|ASCIImathml]] extensions.
-==Links and Relationships==
-: A hypertext link to another document can be inserted into a wiki page using
-the chain link icon in the GUI. Alternatively, there are several techniques
-for inserting a link into a page depending on whether the link is to a page
-within the same wiki group, is a link to a page on a different wiki
-group, or is a link to a different website. In addition to normal
-hypertext links, Yioop also supports relationship links.
-'''Intra-Group Wiki Links''' use the syntax:
-[[name_of_wiki_page|text for the link]]
-[[name_of_wiki_page#heading_or_id_on_page|text for the link]]
-for example, to make a link to this Syntax page one could write,
-[[Syntax|Yioop Wiki Syntax Page]]
-which would look like,
-[[Syntax|Yioop Wiki Syntax Page]]
-'''Inter-Group Wiki Links''' use the syntax:
-[[name_of-group@name_of_wiki_page|text for the link]]
-'''Different Website Links''' use the syntax:
-[[website_url|text for the link]]
-: Relationships are a generalized form of link. They are used to express
-a more complicated linking between two wiki pages and have the syntax:
-[[relationship_type|wiki_page_name|text for the link]]
-: In the navigation dropdown for a Yioop wiki page there are items for
-what links to the current page and what relates to the current page
-based on the links and relationships a page belongs to.
-==Recent Places Dropdowns==
-: You can add a dropdown that can allow users to navigate to recently visited
-wiki pages using the syntax:
-This looks like:
-==Adding Resources to a Page==
-: Yioop wiki syntax supports adding search bars, audio, images, and video to a
-page. The magnifying class edit tool icon can be used to add a search bar via
-the GUI. This can also be added by hand with the syntax:
-{{search:default|size:small|placeholder:Search Placeholder Text}}
-This syntax is split into three parts each separated by a vertical bar |. The
-first part search:default means results from searches should come from the
-default search index. You can replace default with the timestamp of a specific
-index or mix if you do not want to use the default. The second group size:small
-indicates the size of the search bar to be drawn. Choices of size are small,
-medium, and large. Finally, placeholder:Search Placeholder Text indicates the
-grayed out background text in the search input before typing is done should
-read: Search Placeholder Text. Here is what the above code outputs:
-{{search:default|size:small|placeholder:Search Placeholder Text}}
-: Image, video and other media resources can be associated with a page by dragging
-and dropping them in the edit textarea or by clicking on the link click to select
-link in the gray box below the textarea. This would add wiki code such as
-<sub>((resource`:`myphoto.jpg|Resource Description))</sub>
-to the page. Only saving the page will save this code and upload the resource to
-the server. In the above ''myphoto.jpg'' is the resource that will be inserted and
-Resource Description is the alternative text to use in case the viewing browser
-cannot display jpg files. To add a resource
-from a different wiki page belonging to the same group to the current wiki
-page one can use a syntax like:
-<sub>((resource`:`Documentation:ConfigureScreenForm1.png|The work directory form))</sub>
-Here Documentation would be the page and ConfigureScreenForm1.png the resource.
-You can also insert resources from a data-string using ''resource-data'' rather than
-''resource''. For example:
-<sub>((resource-data`:`image/jpeg;base64,/9j/ of image data...|Seekquarry Logo))</sub>
-could be used to inline an image like:
-((resource-|The Seekquarry Logo))
-be aware though that the default maximum wiki page size is 512Kb (this can be set in src/configs/Config.php).
-: Sometimes it is useful to edit the basic resource link
-above to make a link which is a thumbnail of the resource which points to a
-separate page containing that resource. This can be done using the syntax:
-<sub>((resource-thumb`:`myphoto.jpg|Resource Description))</sub>
-: Similarly, by default for resources like PDFs, epub's, etc., the resource tag inlines
-the whole resource into the page, if instead one wants a clickable link to a page where
-the resource is displayed one can use the syntax:
-<sub>((resource-link`:`my_document.pdf|Resource Description))</sub>
-: Comma separated value files (.csv or CSV files) are inlined into a page as a table. Which rows and columns of the CSV to present in this table can be controlled by the resource line. The general format for including
-a CSV resource is:
-<sub> ((resource`:`resource_name.csv#config#top_left_cell#bottom_right_cell|Resource Description))</sub>
-For example,
-<sub>((resource`:`resource_name.csv##B2#C3|Resource Description))</sub>
-might output
-((resource-data:text/csv;base64,LCwsLAosLTIsMywsCiw1LDQsLAosLCwsCiwsLCwK##B2#C3|Example CSV with Headings))
-I.e., just the portion of the CSV given by the rectangle between the cells B2 and C3. Using a config directive we can omit the spreadsheet row and column headings as follows:
-<sub>((resource`:`resource_name.csv#noheadings#B2#C3|Resource Description)) </sub>
-which might output
-((resource-data:text/csv;base64,LCwsLAosLTIsMywsCiw1LDQsLAosLCwsCiwsLCwK#noheadings#B2#C3|Example CSV without Headings))
-CSV spreadsheet files can also be used to output a variety of charts. The general format for the command to insert a chart resource is:
-<sub>((resource-chart_type`:`resource_name.csv#char_config#x_start#x_end#y_start#y_end|Resource Description))</sub>
-Here ''chart_type'' can be one of ''bargraph'',  ''linegraph'', or ''pointgraph''. For example, one might have a line like:
-<sub>((resource-bargraph`:`resource_name.csv##B1#B4#C1#C4|Quadratic Function)) </sub>
-which could produce a chart like
-((resource-bargraph:##(1,1)#(2,4)#(3,9)#(4,16)|Quadratic Function))
-In the above example, the values for the `x` coordinates would come from the cells B1, B2, B3, B4 from
-''resource_name.csv '' and the values for the `y` coordinates would come from cells C1, C2, C3, C4 from
-''resource_name.csv ''. Alternatively, rather than use a CSV to get out data we can just list the points we want to plot with a command like:
-<sub>((resource-bargraph`:`##(1,1)#(2,4)#(3,9)#(4,16)|Quadratic Function))</sub>
-==Manipulating Page Resources==
-: A list of media that have already been associated with
-a page appears under the Page Resource heading below the textarea. This
-table allows the user to rename and delete resources as well as insert the
-same resource at multiple locations within the same document.
-: The resources section of the edit page can be thought of as similar to
-a folder in Windows or MacOS. One can have subfolders of the resource folder.
-: The '''Places''' dropdown at the top of the '''Page Resource''' section allows one to navigate
-these folders.
-: The '''Filter''' textfield lets you enter a search string.
-Clicking '''Go''' then shows only those resources
-which contain that search string in their title.
-: The '''Clip Folder''' dropdown is used to copy files between folders and pages.
-Its current value is the folder that the '''Clip Copy''' buttons next to resources
-will copy their resource to when clicked. You can set the '''Clip Folder''' to
-the current folder using the dropdown, then  move to the page and folder that
-you would like to copy stuff from and click the '''Clip Copy''' button of the
-desired resource.
-: The '''Name''', '''Size''', '''Modified''' header links above the resources list
-control the sort order for the resource list. If a page is a media list page,
-then even in read mode, the sort order selected is remembered when drawing the
-media list.
-: The '''Actions''' drop can be used to create new folders, new text files, and new csv
-text files within the current page resource folder. These are initial named beginning
-with ''untitled'' followed by some number, and if applicable a file extension.
-: Resources entries for the resources list consist first of an icon, followed by a textfield
-with a name for the resource, followed by buttons for actions that can be done to that resource
-(Rename, Add to Page, Clip Copy), followed by a link [X], which can be used to delete the resource.
-If a resource is editable the icon will look like a plus sign together with a pencil. Clicking
-on the icon will then let you edit the resource.
-===Text and CSV Resources===
-: For normal text files clicking edit will bring up a textarea with the context of the text to edit.
-For CSV (comma separated value) files this will present the file as an editable spreadsheet.
-Yioop spreadsheets can have equation much like Excel spreadsheets. Clicking on a cell lets one
-edit its contents. For example, if in the cell A3
-one entered the equation:
- = A1+A2
-then clicking out of the cell would cause it to refresh with the value of the sum of the contents of
-cells A1 and A2. In addition, to the standard arithmetic operators ['*', '/', '+', '-', '%'], the
-spreadsheet expressions can use float or integer literals, and can make use of the following table
-of built-in functions:
-{| class="wikitable"
-!Function Name!!Description
-!avg(x1,...,xn), avg(x1:xn)|| computes average of values of cells listed as arguments
-!ceil(x)|| rounds the value of x up to nearest integer
-!cell(i,j)|| returns the contents of the cell with column name of letter j, and row name i. For example, cell(2,'B') would return the contents of cell B2.
-!col(value, search_row, start_col, end_col)|| searches the row ''search_row'' between the columns
-''start_col'', ''end_col'' for ''value''. Returns the column name where this value was found or -1 if not found.
-For example, col(3, 2, "B", "D") might return C if the cell C2 had value 3.
-!exp(x)|| computes `e^x`
-!floor(x)|| rounds the value of x down to the nearest integer
-!log(x)|| computes `log x`
-!min(x1,...,xn), min(x1:xn)|| computes minimum value of cells listed as arguments
-!max(x1,...,xn), max(x1:xn)|| computes maximum value of cells listed as arguments
-!pow(x,y)|| computes `x^y`
-!row(value, search_col, start_row, end_row)|| searches the column ''search_col''
-between the rows ''start_row'', ''end_row'' for ''value''.
-Returns the row name where this value was found or -1 if not found.
-For example, row(3, "C", "1", "5") might return 2 if the cell C2 had value 3.
-!sqrt(x)|| computes `sqrt(x)`
-!sum(x1,...,xn), sum(x1:xn)|| computes sum of values of cells listed as arguments
-!username()|| returns username of the person using this CSV file
-===HTML, PDF and EPub Resources===
-: How HTML, PDF, EPub resources included on a page render depends on how the Yioop wiki software
-has been configured. If no special configuration has been done, then HTML and PDF documents
-will bbe rendered in an <iframe> tag within the current wiki page. In the EPub, case a link
-to download the resource will be given. If the wiki software detects the presence of the
-file APP_DIR/scripts/pdf.js ([[|PDF.js]])
-or APP_DIR/scripts/epub.js ([[|epub.js]]), the wiki
-system will render the resource in a Javascript viewer and will do things like remember reading
-===Video and Audio Resources===
-: Not all browsers support the same video and audio formats for playback. For this reason
-it sometimes is useful to have multiple video resources for the same video. For example,
-you might have a .ogv and .vp8 version of the same video recording. In read (non-edit)
-mode, the Yioop wiki system displays only one link for video or audio files that have
-the same name except for extension. It then includes the grouped file as separated <source>
-tags within either the <video> or <audio> html tag used to render the item in the browser.
-In this way, you can make your media take best advantages to whatever capabilities your
-client's browser has. If you don't feel like recoding your media in such a fancy way, a safe
-rule of thumb is that .mp3 audio will playback in all modern browser, and that .mp4 video
-will playback in all modern browser.
-: For video it is sometimes useful to add a subtitle or caption track. Yioop wiki supports
-[[|WebVTT]] format subtitles and captions. To see how
-Yioop wiki makes use of these files, suppose you included a resource ''foo.mp4'' in your
-wiki pages, and you also had a file named ''foo-captions-en-US.vtt'' then when the HTML
-page is generated from your wiki page, a <track> tag for the caption file would be added
-to the <video> tag. A user seeing this page would then see in the video player a closed caption
-symbol and be able to turn on/off (defaults off) the English captions. If you wanted
-named the file ''foo-subtitles-en-US.vtt'' instead, then Yioop wiki would include it as a
-subtitles track (defaults on). You can add captions/subtitle files for as many languages as
-: When viewing the page resources for a page in edit mode, one can see one file/resource and
-no grouping of resources by name is done. In this way you can keep track of exactly what
-resources are available for a page.
-==Page Settings, Page Type==
-: In edit mode for a wiki page, next to the page name, is a link [Settings].
-Clicking this link expands a form which can be used to control global settings
-for a wiki page.  This form contains a drop down for the page type, another
-drop down for the type of border for the page in non-logged in mode,
-a checkbox for whether a table of contents should be auto-generated from level 2
-and level three headings and then text
-fields or areas for the page title, author, meta robots, and page description.
-Beneath this one can specify another wiki page to be used as a header for this
-page and also specify another wiki page to be used as a footer for this page.
-: The contents of the page title is displayed in the browser title when the
-wiki page is accessed with the  Activity Panel collapsed or when not logged in.
-Similarly, in the collapsed or not logged in mode, if one looks as the HTML
-page source for the page,  in the head of document, <meta> tags for author,
-robots, and description are set according to these fields. These fields can
-be useful for search engine optimization. The robots meta tag can be
-used to control how search engine robots index the page. Wikipedia has more information on
-[[|Meta Elements]].
-: The '''Standard''' page type treats the page as a usual wiki page.
-: '''Page Alias''' type redirects the current page to another page name. This can
-be used to handle things like different names for the same topic or to do localization
-of pages. For example, if you switch the locale from English to French and
-you were on the wiki page dental_floss when you switch to French the article
-dental_floss might redirect to the page dentrifice.
-: '''Media List''' type means that the page, when read, should display just the
-resources in the page as a list of thumbnails and links. These links for the
-resources go to a separate pages used to display these resources.
-This kind of page is useful for a gallery of
-images or a collection of audio or video files.
-: '''Presentation''' type is for a wiki page whose purpose is a slide presentation. In this mode,
-on a line by itself is used to separate one slide. If presentation type is a selected a new
-slide icon appears in the wiki edit bar allowining one to easily add new slides.
-When the Activity panel is not collapsed and you are reading a presentation, it just
-displays as a single page with all slides visible. Collapsing the Activity panel presents
-the slides as a typical slide presentation using the
+END_HEAD_VARS=Yioop Wiki Syntax=
+: Wiki syntax is a lightweight way to markup a text document so that
+it can be formatted and drawn nicely by Yioop.
+This page briefly describes the wiki syntax supported by Yioop.
+: In wiki syntax headings of documents and sections are written as follows:
+and would look like:
+: In Yioop two new lines indicates a new paragraph. You can control
+the indent of a paragraph by putting colons followed by a space in front of it:
+: some indent
+:: a little more
+::: even more
+:::: that's sorta crazy
+which looks like:
+: some indent
+:: a little more
+::: even more
+:::: that's sorta crazy
+==Horizontal Rule==
+: Sometimes it is convenient to separate paragraphs or sections with a horizontal
+rule. This can be done by placing four hyphens on a line by themselves:
+This results in a line that looks like:
+==Text Formatting Within Paragraphs==
+: Within a paragraph it is often convenient to make some text bold, italics,
+underlined, etc. Below is a quick summary of how to do this:
+===Wiki Markup===
+|<nowiki>'''''bold and italic'''''</nowiki>|'''''bold and italic'''''
+===HTML Tags===
+: Yioop also supports several html tags such as:
+|<nowiki><s>strike through</s> or
+<strike>strike through</strike> </nowiki>|<s>strike through</s>
+|<nowiki><sup>superscript</sup> and
+<sub>subscript</sub></nowiki>|<sup>superscript</sup> and
+===Spacing within Paragraphs===
+: The HTML entity
+<nowiki> </nowiki>
+can be used to create a non-breaking space. The tag
+can be used to produce a line break.
+==Preformatted Text and Unformatted Text==
+: You can force text to be formatted as you typed it rather
+than using the layout mechanism of the browser using the
+<nowiki><pre>preformatted text tag.</pre></nowiki>
+Alternatively, a sequence of lines all beginning with a
+space character will also be treated as preformatted.
+: Wiki markup within pre tags is still parsed by Yioop.
+If you would like to add text that is not parsed, enclosed
+it in <tt><`mbox{nowiki}`> </`mbox{nowiki}`></tt> tags.
+==Styling Text Paragraphs==
+: Yioop wiki syntax offers a number of templates for
+control the styles, and alignment of text for
+a paragraph or group of paragraphs:<br />
+`{{`left| some text`}}`,<br /> `{{`right| some text`}}`,<br />
+and<br />
+`{{`center| some text`}}`<br /> can be used to left-justify,
+right-justify, and center a block of text. For example,
+the last command, would produce:
+some text
+If you know cascading style sheets (CSS), you can set
+a class or id selector for a block of text using:<br />
+`{{`class="my-class-selector" some text`}}`<br />and<br />
+`{{`id="my-id-selector" some text`}}`.<br />
+You can also apply inline styles to a block of text
+using the syntax:<br />
+`{{`style="inline styles" some text`}}`.<br />
+For example, `{{`style="color:red" some text`}}` looks
+like {{style="color:red" some text}}.
+: The Yioop Wiki Syntax supported of ways of listing items:
+bulleted/unordered list, numbered/ordered lists, and
+definition lists. Below are some examples:
+===Unordered Lists===
+* Item1
+** SubItem1
+** SubItem2
+*** SubSubItem1
+* Item 2
+* Item 3
+would be drawn as:
+* Item1
+** SubItem1
+** SubItem2
+*** SubSubItem1
+* Item 2
+* Item 3
+===Ordered Lists===
+# Item1
+## SubItem1
+## SubItem2
+### SubSubItem1
+# Item 2
+# Item 3
+# Item1
+## SubItem1
+## SubItem2
+### SubSubItem1
+# Item 2
+# Item 3
+===Mixed Lists===
+# Item1
+#* SubItem1
+#* SubItem2
+#*# SubSubItem1
+# Item 2
+# Item 3
+# Item1
+#* SubItem1
+#* SubItem2
+#*# SubSubItem1
+# Item 2
+# Item 3
+===Definition Lists===
+;Term 1: Definition of Term 1
+;Term 2: Definition of Term 2
+;Term 1: Definition of Term 1
+;Term 2: Definition of Term 2
+: A table begins with {`|`  and ends with `|`}. Cells are separated with | and
+rows are separated with |- as can be seen in the following
+Headings for columns and rows can be made by using an exclamation point, !,
+rather than a vertical bar |. For example,
+Captions can be added using the + symbol:
+|+ My Caption
+|+ My Caption
+Finally, you can put a CSS class or style attributes (or both) on the first line
+of the table to further control how it looks:
+{| class="wikitable"
+|+ My Caption
+{| class="wikitable"
+|+ My Caption
+Within a cell attributes like align, valign, styles, and class can be used. For
+| style="text-align:right;"| a| b
+| lalala | lalala
+| style="text-align:right;"| a| b
+| lalala | lalala
+: Math can be included into a wiki document by either using the math tag:
+\sum_{i=1}^{n} i = frac{(n+1)(n)}{2}
+\sum_{i=1}^{n} i = frac{(n+1)(n)}{2}
+or by enclosing the math in backticks:
+`[[1, -2],[3,4]]`
+`[[1, -2],[3,4]]`.
+Rendering of math is done using [[|MathJax]], making us of the [[|ASCIImathml]] extensions.
+==Links and Relationships==
+: A hypertext link to another document can be inserted into a wiki page using
+the chain link icon in the GUI. Alternatively, there are several techniques
+for inserting a link into a page depending on whether the link is to a page
+within the same wiki group, is a link to a page on a different wiki
+group, or is a link to a different website. In addition to normal
+hypertext links, Yioop also supports relationship links.
+'''Intra-Group Wiki Links''' use the syntax:
+[[name_of_wiki_page|text for the link]]
+[[name_of_wiki_page#heading_or_id_on_page|text for the link]]
+for example, to make a link to this Syntax page one could write,
+[[Syntax|Yioop Wiki Syntax Page]]
+which would look like,
+[[Syntax|Yioop Wiki Syntax Page]]
+'''Inter-Group Wiki Links''' use the syntax:
+[[name_of-group@name_of_wiki_page|text for the link]]
+'''Different Website Links''' use the syntax:
+[[website_url|text for the link]]
+: Relationships are a generalized form of link. They are used to express
+a more complicated linking between two wiki pages and have the syntax:
+[[relationship_type|wiki_page_name|text for the link]]
+: In the navigation dropdown for a Yioop wiki page there are items for
+what links to the current page and what relates to the current page
+based on the links and relationships a page belongs to.
+==Recent Places Dropdowns==
+: You can add a dropdown that can allow users to navigate to recently visited
+wiki pages using the syntax:
+This looks like:
+==Adding Resources to a Page==
+: Yioop wiki syntax supports adding search bars, audio, images, and video to a
+page. The magnifying class edit tool icon can be used to add a search bar via
+the GUI. This can also be added by hand with the syntax:
+{{search:default|size:small|placeholder:Search Placeholder Text}}
+This syntax is split into three parts each separated by a vertical bar |. The
+first part search:default means results from searches should come from the
+default search index. You can replace default with the timestamp of a specific
+index or mix if you do not want to use the default. The second group size:small
+indicates the size of the search bar to be drawn. Choices of size are small,
+medium, and large. Finally, placeholder:Search Placeholder Text indicates the
+grayed out background text in the search input before typing is done should
+read: Search Placeholder Text. Here is what the above code outputs:
+{{search:default|size:small|placeholder:Search Placeholder Text}}
+: Image, video and other media resources can be associated with a page by dragging
+and dropping them in the edit textarea or by clicking on the link click to select
+link in the gray box below the textarea. This would add wiki code such as
+<sub>((resource`:`myphoto.jpg|Resource Description))</sub>
+to the page. Only saving the page will save this code and upload the resource to
+the server. In the above ''myphoto.jpg'' is the resource that will be inserted and
+Resource Description is the alternative text to use in case the viewing browser
+cannot display jpg files. To add a resource
+from a different wiki page belonging to the same group to the current wiki
+page one can use a syntax like:
+<sub>((resource`:`Documentation:ConfigureScreenForm1.png|The work directory form))</sub>
+Here Documentation would be the page and ConfigureScreenForm1.png the resource.
+You can also insert resources from a data-string using ''resource-data'' rather than
+''resource''. For example:
+<sub>((resource-data`:`image/jpeg;base64,/9j/ of image data...|Seekquarry Logo))</sub>
+could be used to inline an image like:
+((resource-|The Seekquarry Logo))
+be aware though that the default maximum wiki page size is 512Kb (this can be set in src/configs/Config.php).
+: Sometimes it is useful to edit the basic resource link
+above to make a link which is a thumbnail of the resource which points to a
+separate page containing that resource. This can be done using the syntax:
+<sub>((resource-thumb`:`myphoto.jpg|Resource Description))</sub>
+: Similarly, by default for resources like PDFs, epub's, etc., the resource tag inlines
+the whole resource into the page, if instead one wants a clickable link to a page where
+the resource is displayed one can use the syntax:
+<sub>((resource-link`:`my_document.pdf|Resource Description))</sub>
+: Comma separated value files (.csv or CSV files) are inlined into a page as a table. Which rows and columns of the CSV to present in this table can be controlled by the resource line. The general format for including
+a CSV resource is:
+<sub> ((resource`:`resource_name.csv#config#top_left_cell#bottom_right_cell|Resource Description))</sub>
+For example,
+<sub>((resource`:`resource_name.csv##B2#C3|Resource Description))</sub>
+might output
+((resource-data:text/csv;base64,LCwsLAosLTIsMywsCiw1LDQsLAosLCwsCiwsLCwK##B2#C3|Example CSV with Headings))
+I.e., just the portion of the CSV given by the rectangle between the cells B2 and C3. Using a config directive we can omit the spreadsheet row and column headings as follows:
+<sub>((resource`:`resource_name.csv#noheadings#B2#C3|Resource Description)) </sub>
+which might output
+((resource-data:text/csv;base64,LCwsLAosLTIsMywsCiw1LDQsLAosLCwsCiwsLCwK#noheadings#B2#C3|Example CSV without Headings))
+CSV spreadsheet files can also be used to output a variety of charts. The general format for the command to insert a chart resource is:
+<sub>((resource-chart_type`:`resource_name.csv#char_config#x_start#x_end#y_start#y_end|Resource Description))</sub>
+Here ''chart_type'' can be one of ''bargraph'',  ''linegraph'', or ''pointgraph''. For example, one might have a line like:
+<sub>((resource-bargraph`:`resource_name.csv##B1#B4#C1#C4|Quadratic Function)) </sub>
+which could produce a chart like
+((resource-bargraph:##(1,1)#(2,4)#(3,9)#(4,16)|Quadratic Function))
+In the above example, the values for the `x` coordinates would come from the cells B1, B2, B3, B4 from
+''resource_name.csv '' and the values for the `y` coordinates would come from cells C1, C2, C3, C4 from
+''resource_name.csv ''. Alternatively, rather than use a CSV to get out data we can just list the points we want to plot with a command like:
+<sub>((resource-bargraph`:`##(1,1)#(2,4)#(3,9)#(4,16)|Quadratic Function))</sub>
+==Manipulating Page Resources==
+: A list of media that have already been associated with
+a page appears under the Page Resource heading below the textarea. This
+table allows the user to rename and delete resources as well as insert the
+same resource at multiple locations within the same document.
+: The resources section of the edit page can be thought of as similar to
+a folder in Windows or MacOS. One can have subfolders of the resource folder.
+: The '''Places''' dropdown at the top of the '''Page Resource''' section allows one to navigate
+these folders.
+: The '''Filter''' textfield lets you enter a search string.
+Clicking '''Go''' then shows only those resources
+which contain that search string in their title.
+: The '''Clip Folder''' dropdown is used to copy files between folders and pages.
+Its current value is the folder that the '''Clip Copy''' buttons next to resources
+will copy their resource to when clicked. You can set the '''Clip Folder''' to
+the current folder using the dropdown, then  move to the page and folder that
+you would like to copy stuff from and click the '''Clip Copy''' button of the
+desired resource.
+: The '''Name''', '''Size''', '''Modified''' header links above the resources list
+control the sort order for the resource list. If a page is a media list page,
+then even in read mode, the sort order selected is remembered when drawing the
+media list.
+: The '''Actions''' drop can be used to create new folders, new text files, and new csv
+text files within the current page resource folder. These are initial named beginning
+with ''untitled'' followed by some number, and if applicable a file extension.
+: Resources entries for the resources list consist first of an icon, followed by a textfield
+with a name for the resource, followed by buttons for actions that can be done to that resource
+(Rename, Add to Page, Clip Copy), followed by a link [X], which can be used to delete the resource.
+If a resource is editable the icon will look like a plus sign together with a pencil. Clicking
+on the icon will then let you edit the resource.
+===Text and CSV Resources===
+: For normal text files clicking edit will bring up a textarea with the context of the text to edit.
+For CSV (comma separated value) files this will present the file as an editable spreadsheet.
+Yioop spreadsheets can have equation much like Excel spreadsheets. Clicking on a cell lets one
+edit its contents. For example, if in the cell A3
+one entered the equation:
+ = A1+A2
+then clicking out of the cell would cause it to refresh with the value of the sum of the contents of
+cells A1 and A2. In addition, to the standard arithmetic operators ['*', '/', '+', '-', '%'], the
+spreadsheet expressions can use float or integer literals, and can make use of the following table
+of built-in functions:
+{| class="wikitable"
+!Function Name!!Description
+!avg(x1,...,xn), avg(x1:xn)|| computes average of values of cells listed as arguments
+!ceil(x)|| rounds the value of x up to nearest integer
+!cell(i,j)|| returns the contents of the cell with column name of letter j, and row name i. For example, cell(2,'B') would return the contents of cell B2.
+!col(value, search_row, start_col, end_col)|| searches the row ''search_row'' between the columns
+''start_col'', ''end_col'' for ''value''. Returns the column name where this value was found or -1 if not found.
+For example, col(3, 2, "B", "D") might return C if the cell C2 had value 3.
+!exp(x)|| computes `e^x`
+!floor(x)|| rounds the value of x down to the nearest integer
+!log(x)|| computes `log x`
+!min(x1,...,xn), min(x1:xn)|| computes minimum value of cells listed as arguments
+!max(x1,...,xn), max(x1:xn)|| computes maximum value of cells listed as arguments
+!pow(x,y)|| computes `x^y`
+!row(value, search_col, start_row, end_row)|| searches the column ''search_col''
+between the rows ''start_row'', ''end_row'' for ''value''.
+Returns the row name where this value was found or -1 if not found.
+For example, row(3, "C", "1", "5") might return 2 if the cell C2 had value 3.
+!sqrt(x)|| computes `sqrt(x)`
+!sum(x1,...,xn), sum(x1:xn)|| computes sum of values of cells listed as arguments
+!username()|| returns username of the person using this CSV file
+===HTML, PDF and EPub Resources===
+: How HTML, PDF, EPub resources included on a page render depends on how the Yioop wiki software
+has been configured. If no special configuration has been done, then HTML and PDF documents
+will bbe rendered in an <iframe> tag within the current wiki page. In the EPub, case a link
+to download the resource will be given. If the wiki software detects the presence of the
+file APP_DIR/scripts/pdf.js ([[|PDF.js]])
+or APP_DIR/scripts/epub.js ([[|epub.js]]), the wiki
+system will render the resource in a Javascript viewer and will do things like remember reading
+===Video and Audio Resources===
+: Not all browsers support the same video and audio formats for playback. For this reason
+it sometimes is useful to have multiple video resources for the same video. For example,
+you might have a .ogv and .vp8 version of the same video recording. In read (non-edit)
+mode, the Yioop wiki system displays only one link for video or audio files that have
+the same name except for extension. It then includes the grouped file as separated <source>
+tags within either the <video> or <audio> html tag used to render the item in the browser.
+In this way, you can make your media take best advantages to whatever capabilities your
+client's browser has. If you don't feel like recoding your media in such a fancy way, a safe
+rule of thumb is that .mp3 audio will playback in all modern browser, and that .mp4 video
+will playback in all modern browser.
+: For video it is sometimes useful to add a subtitle or caption track. Yioop wiki supports
+[[|WebVTT]] format subtitles and captions. To see how
+Yioop wiki makes use of these files, suppose you included a resource ''foo.mp4'' in your
+wiki pages, and you also had a file named ''foo-captions-en-US.vtt'' then when the HTML
+page is generated from your wiki page, a <track> tag for the caption file would be added
+to the <video> tag. A user seeing this page would then see in the video player a closed caption
+symbol and be able to turn on/off (defaults off) the English captions. If you wanted
+named the file ''foo-subtitles-en-US.vtt'' instead, then Yioop wiki would include it as a
+subtitles track (defaults on). You can add captions/subtitle files for as many languages as
+: When viewing the page resources for a page in edit mode, one can see one file/resource and
+no grouping of resources by name is done. In this way you can keep track of exactly what
+resources are available for a page.
+==Page Settings, Page Type==
+: In edit mode for a wiki page, next to the page name, is a link [Settings].
+Clicking this link expands a form which can be used to control global settings
+for a wiki page.  This form contains a drop down for the page type, another
+drop down for the type of border for the page in non-logged in mode,
+a checkbox for whether a table of contents should be auto-generated from level 2
+and level three headings and then text
+fields or areas for the page title, author, meta robots, and page description.
+Beneath this one can specify another wiki page to be used as a header for this
+page and also specify another wiki page to be used as a footer for this page.
+: The contents of the page title is displayed in the browser title when the
+wiki page is accessed with the  Activity Panel collapsed or when not logged in.
+Similarly, in the collapsed or not logged in mode, if one looks as the HTML
+page source for the page,  in the head of document, <meta> tags for author,
+robots, and description are set according to these fields. These fields can
+be useful for search engine optimization. The robots meta tag can be
+used to control how search engine robots index the page. Wikipedia has more information on
+[[|Meta Elements]].
+: The '''Standard''' page type treats the page as a usual wiki page.
+: '''Page Alias''' type redirects the current page to another page name. This can
+be used to handle things like different names for the same topic or to do localization
+of pages. For example, if you switch the locale from English to French and
+you were on the wiki page dental_floss when you switch to French the article
+dental_floss might redirect to the page dentrifice.
+: '''Media List''' type means that the page, when read, should display just the
+resources in the page as a list of thumbnails and links. These links for the
+resources go to a separate pages used to display these resources.
+This kind of page is useful for a gallery of
+images or a collection of audio or video files.
+: '''Presentation''' type is for a wiki page whose purpose is a slide presentation. In this mode,
+on a line by itself is used to separate one slide. If presentation type is a selected a new
+slide icon appears in the wiki edit bar allowining one to easily add new slides.
+When the Activity panel is not collapsed and you are reading a presentation, it just
+displays as a single page with all slides visible. Collapsing the Activity panel presents
+the slides as a typical slide presentation using the
 [[|Slidy]] javascript.
 $public_pages["en-US"]["ad_program_terms"] = <<< 'EOD'
@@ -2366,19 +2366,23 @@ robots=




-END_HEAD_VARSThese checkboxes control whether various links and drop downs on the search result and landing
-pages appear or not.
-; &#039;&#039;&#039;Word Suggest&#039;&#039;&#039;: Controls whether the suggested query drop down appear as a query is entered in the search bar and whether thesaurus results appear on search result pages.
-; &#039;&#039;&#039;Subsearch&#039;&#039;&#039; : Controls whether the links to subsearches such as Image, Video, and News search appear at the top of all search pages
-; &#039;&#039;&#039;Signin&#039;&#039;&#039; : Controls whether the &#039;&#039;&#039;Sign In&#039;&#039;&#039; link appears at the top of the Yioop landing and search result pages.
-; &#039;&#039;&#039;Cache&#039;&#039;&#039;, &#039;&#039;&#039;Similar&#039;&#039;&#039;, &#039;&#039;&#039;Inlinks&#039;&#039;&#039;, &#039;&#039;&#039;IP Address&#039;&#039;&#039;: Control whether the corresponding links appear after each search result item.

+END_HEAD_VARSThese checkboxes control whether various links and drop downs on the search result and landing
+pages appear or not.
+; &#039;&#039;&#039;Word Suggest&#039;&#039;&#039;: Controls whether the suggested query drop down appear as a query is entered in the search bar.
+; &#039;&#039;&#039;Subsearch&#039;&#039;&#039; : Controls whether the links to subsearches such as Image, Video, and News search appear at the top of all search pages
+; &#039;&#039;&#039;Signin&#039;&#039;&#039; : Controls whether the &#039;&#039;&#039;Sign In&#039;&#039;&#039; link appears at the top of the Yioop landing and search result pages.
+; &#039;&#039;&#039;Cache&#039;&#039;&#039;, &#039;&#039;&#039;Similar&#039;&#039;&#039;, &#039;&#039;&#039;Inlinks&#039;&#039;&#039;, &#039;&#039;&#039;IP Address&#039;&#039;&#039;: Control whether the corresponding links appear after each search result item.

 $help_pages["en-US"]["Seed_Sites_and_URL_Suggestions"] = <<< EOD
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php
index febb9ab14..f5ee96c9e 100755
--- a/src/controllers/SearchController.php
+++ b/src/controllers/SearchController.php
@@ -965,12 +965,12 @@ class SearchController extends Controller implements CrawlConstants
                     $out_pages[$first_image]['IMAGES'] = [];
                 $out_pages[$first_image]['IMAGES'][] = $page;
-            } else if (!empty($page[self::IS_NEWS])) {
+            } else if (!empty($page[self::IS_FEED])) {
                 if ($first_feed_item == -1) {
                     $first_feed_item = count($out_pages);
-                    $out_pages[$first_feed_item]['NEWS'] = [];
+                    $out_pages[$first_feed_item]['FEED'] = [];
-                $out_pages[$first_feed_item]['NEWS'][] = $page;
+                $out_pages[$first_feed_item]['FEED'][] = $page;
             } else {
                 $out_pages[] = $page;
diff --git a/src/controllers/components/StoreComponent.php b/src/controllers/components/StoreComponent.php
index 54f47d743..856f595a4 100644
--- a/src/controllers/components/StoreComponent.php
+++ b/src/controllers/components/StoreComponent.php
@@ -441,7 +441,7 @@ class StoreComponent extends Component
         $parent = $this->parent;
         $keywords = explode("," , strtoupper($data['KEYWORDS']));
         array_walk($keywords, [C\NS_COMPONENTS .
-            "AdvertisementComponent", "trim_value"]);
+            "StoreComponent", "trim_value"]);
         $min_bid_reqd = 0;
         $expensive_bid = 0;
         foreach ($keywords as $keyword) {
diff --git a/src/css/search.css b/src/css/search.css
index d2a8a41c0..e2c83a4db 100755
--- a/src/css/search.css
+++ b/src/css/search.css
@@ -1285,20 +1285,6 @@ li
     top: -0.8in;
     width: 8in;
-.html-ltr .thesaurus-serp-results
-    left: 2.2in;
-    position: relative;
-    top: -1.7in;
-    width: 8in;
-.html-rtl .thesaurus-serp-results
-    right: 2.2in;
-    position: relative;
-    top: -1.7in;
-    width: 8in;
 .html-rtl .serp
     position: relative;
@@ -1353,27 +1339,6 @@ li
-.html-ltr .thesaurus
-    color: #666;
-    font-size: 14pt;
-    font-weight: bold;
-    left: 0.2in;
-    position: relative;
-    top: -0.8in;
-    width: 8in;
-.html-rtl .thesaurus
-    color: #666;
-    font-size: 14pt;
-    font-weight: bold;
-    right: 0.2in;
-    position: relative;
-    top: -0.8in;
-    width: 8in;
     clear: both;
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php
index f098830af..f498bddde 100755
--- a/src/executables/ArcTool.php
+++ b/src/executables/ArcTool.php
@@ -256,49 +256,66 @@ class ArcTool implements CrawlConstants
         echo "\nBundle Name: $bundle_name\n";
         $archive_type = $this->getArchiveKind($archive_path);
         echo "Bundle Type: $archive_type\n";
         if (strcmp($archive_type,"IndexArchiveBundle") != 0) {
             $this->badFormatMessageAndExit($archive_path, "index");
         $index_timestamp = substr($archive_path,
             strpos($archive_path, self::index_data_base_name) +
-        $mask = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
-        $hash_key = L\crawlHashWord($word, true, $mask) ;
-        $start_time = microtime(true);
-        $info = IndexManager::getWordInfo($index_timestamp, $hash_key, 0,
-            $mask, -1, $start_generation, $num_generations);
-        echo "Dictionary Lookup Time:" . L\changeInMicrotime($start_time)."\n";
-        if (!$info) {
+        $hash_paths = L\allCrawlHashPaths($word, true);
+        $found = false;
+        echo "!!Performing Looking up for phrase " .
+            "at each possible shift position. Outputting results for each ".
+            "possibility!!\n";
+        foreach ($hash_paths as $hash_shift) {
+            if (is_array($hash_shift)) {
+                list($hash_key, $shift) = $hash_shift;
+            } else {
+                $hash_key = $hash_shift;
+                $shift = 0;
+            }
+            $start_time = microtime(true);
+            echo "Looking up in dictionary:\n";
+            echo " Key: ". L\toHexString($hash_key) . "\n";
+            echo " Shift: ". $shift . "\n";
+            $info = IndexManager::getWordInfo($index_timestamp, $hash_key,
+                $shift, -1, $start_generation, $num_generations);
+            echo "Dictionary Lookup Time:" . L\changeInMicrotime($start_time)
+                . "\n";
+            if (!$info) {
+                echo " Key not found\n";
+                continue;
+            }
+            $found = true;
+            echo "Dictionary Tiers: ";
+            $index = IndexManager::getIndex($index_timestamp);
+            $tiers = $index->dictionary->active_tiers;
+            foreach ($tiers as $tier) {
+                echo " $tier";
+            }
+            echo "\nBundle Dictionary Entries for '$word':\n";
+            echo "====================================\n";
+            $i = 1;
+            foreach ($info as $record) {
+                echo "RECORD: $i\n";
+                echo "Hex ID: " . L\toHexString($record[4])."\n";
+                echo "GENERATION: {$record[0]}\n";
+                echo "FIRST WORD OFFSET: {$record[1]}\n";
+                echo "LAST WORD OFFSET: {$record[2]}\n";
+                echo "NUMBER OF POSTINGS: {$record[3]}\n\n";
+                $i++;
+            }
+        }
+        if (!$found) {
             //fallback to old word hashes
             $info = IndexManager::getWordInfo($index_timestamp,
-                L\crawlHash($word, true), 0, "", 1, $start_generation,
+                L\crawlHash($word, true), 0, 1, $start_generation,
             if (!$info) {
-                echo "\n$word does not appear in bundle!\n\n";
+                echo "\n$word does not appear in bundle!\n";
-        echo "Dictionary Tiers: ";
-        $index = IndexManager::getIndex($index_timestamp);
-        $tiers = $index->dictionary->active_tiers;
-        foreach ($tiers as $tier) {
-            echo " $tier";
-        }
-        echo "\nBundle Dictionary Entries for '$word':\n";
-        echo "====================================\n";
-        $i = 1;
-        foreach ($info as $record) {
-            echo "RECORD: $i\n";
-            echo "Hex ID: ".L\toHexString($record[4])."\n";
-            echo "Media Type: " . PhraseParser::getMediaType($record[4]) . "\n";
-            echo "Safe: ". PhraseParser::getSafety($record[4]) . "\n";
-            echo "GENERATION: {$record[0]}\n";
-            echo "FIRST WORD OFFSET: {$record[1]}\n";
-            echo "LAST WORD OFFSET: {$record[2]}\n";
-            echo "NUMBER OF POSTINGS: {$record[3]}\n\n";
-            $i++;
-        }
      * Prints information about the number of words and frequencies of words
@@ -568,11 +585,12 @@ class ArcTool implements CrawlConstants
                     $max_generation = max($max_generation, $generation);
                 for ($i = $start_shard; $i < $max_generation + 1; $i++) {
-                    $shard_name = $path."/posting_doc_shards/index$i";
+                    $shard_name = $path . "/posting_doc_shards/index$i";
                     echo "\nShard $i of $num_shards\n";
                     $shard = new IndexShard($shard_name, $i,
                         C\NUM_DOCS_PER_GENERATION, true);
                     if ($dictionary->addShardDictionary($shard)) {
+                        $shard->saveWithoutDictionary();
                         file_put_contents($shard_count_file, $i + 1);
                     } else {
                         echo "Problem adding shard $i";
@@ -929,7 +947,7 @@ class ArcTool implements CrawlConstants
                         $triplet_lists =
-                        $word_lists = $triplet_list['WORD_LIST'];
+                        $word_lists = $triplet_lists['WORD_LIST'];
                         $len = strlen($phrase_string);
                         if (PhraseParser::computeSafeSearchScore($word_lists,
                             $len) < 0.012) {
@@ -952,8 +970,7 @@ class ArcTool implements CrawlConstants
                         $doc_keys .= $score_keys;
                     $shard->addDocumentWords($doc_keys, $offset,
-                        $word_lists, $meta_ids,
-                        PhraseParser::$materialized_metas, true, false);
+                        $word_lists, $meta_ids, true, false);
                     $offset = $object[0];
                 $seen_partition += $num_to_get;
@@ -963,7 +980,6 @@ class ArcTool implements CrawlConstants
      * Used to create an archive_bundle_iterator for a non-yioop archive
      * As these iterators sometimes make use of a folder to store savepoints
diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php
index 3d92faa31..031baece0 100755
--- a/src/executables/Fetcher.php
+++ b/src/executables/Fetcher.php
@@ -2865,8 +2865,7 @@ class Fetcher implements CrawlConstants
                 ]->addDocumentWords($doc_keys, self::NEEDS_OFFSET_FLAG,
-                $word_lists, $meta_ids, PhraseParser::$materialized_metas,
-                true, $doc_rank);
+                $word_lists, $meta_ids, true, $doc_rank);
             if (isset($word_and_qa_lists['QUESTION_ANSWER_LIST'])) {
                 $site[self::QUESTION_ANSWERS] =
@@ -2882,6 +2881,9 @@ class Fetcher implements CrawlConstants
             if (!$this->no_process_links && !isset($site[self::JUST_METAS]) &&
                 !isset($this->programming_language_extension[$lang])) {
+                $tokenizer = PhraseParser::getTokenizer($lang);
+                $has_stopwords_remover =
+                    method_exists($tokenizer, "stopwordsRemover");
                 foreach ($site[self::LINKS] as $url => $link_text) {
                     /* this mysterious check means won't index links from
                       robots.txt. Sitemap will still be in TO_CRAWL, but that's
@@ -2900,17 +2902,25 @@ class Fetcher implements CrawlConstants
                     $elink_flag = ($link_host != $host) ? true : false;
                     $link_text = strip_tags($link_text);
+                    if ($has_stopwords_remover) {
+                        $useful_text = $tokenizer->stopwordsRemover($link_text);
+                    } else {
+                        $useful_text = $link_text;
+                    }
+                    if (mb_strlen($useful_text) < C\MIN_LINKS_TEXT_DOC) {
+                        continue;
+                    }
                     $ref = ($elink_flag) ? "eref" : "iref";
                     $url = str_replace('|', "%7C", $url);
                     $link_id =
-                        "url|".$url."|text|".urlencode($link_text).
-                        "|$ref|".$site_url;
+                        "url|" . $url . "|text|" . urlencode($link_text) .
+                        "|$ref|" . $site_url;
                     $elink_flag_string = ($elink_flag) ? "e" :
                     $link_keys = L\crawlHash($url, true) .
                         L\crawlHash($link_id, true) .
-                        substr(L\crawlHash($host."/", true), 1);
+                        substr(L\crawlHash($host . "/", true), 1);
                     $summary[self::URL] =  $link_id;
                     $summary[self::TITLE] = $url;
                         // stripping html to be on the safe side
@@ -2937,9 +2947,7 @@ class Fetcher implements CrawlConstants
                             self::NEEDS_OFFSET_FLAG, $link_word_lists,
-                                $link_meta_ids,
-                                PhraseParser::$materialized_metas, false,
-                                $link_rank);
+                            $link_meta_ids, false, $link_rank);
             $interim_elapse = L\changeInMicrotime($interim_time);
diff --git a/src/executables/QueryTool.php b/src/executables/QueryTool.php
index 0cf09c645..bbc9a0847 100755
--- a/src/executables/QueryTool.php
+++ b/src/executables/QueryTool.php
@@ -78,7 +78,8 @@ class QueryTool implements CrawlConstants
         $query = $argv[1];
-        $results_per_page = (isset($argv[2])) ? $argv[2] : 10;
+        $results_per_page = (isset($argv[2])) ? (is_numeric($argv[2]) ?
+            $argv[2] : 0 ) : 10;
         $limit = (isset($argv[3])) ? $argv[3] : 0;
         L\setLocaleObject((isset($argv[4])) ? $argv[4] : C\DEFAULT_LOCALE);
         $start_time = microtime(true);
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 692b671cd..5adc90db4 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -344,7 +344,7 @@ class QueueServer implements CrawlConstants, Join
             in_array($argv[3], [self::INDEXER, self::SCHEDULER])) {
             $this->server_type = $argv[3];
             $this->server_name = $argv[3];
-            L\crawlLog($argv[3]." logging started.");
+            L\crawlLog($argv[3] . " logging started.");
         $remove = false;
         $old_message_names = ["QueueServerMessages.txt",
@@ -524,9 +524,8 @@ class QueueServer implements CrawlConstants, Join
             $crawl_params[self::CRAWL_TIME] = $this->crawl_time;
             $crawl_params[self::CRAWL_TYPE] = $this->crawl_type;
             $info_string = serialize($crawl_params);
-            file_put_contents(
-                C\CRAWL_DIR."/schedules/". $this->process_name . "Messages.txt",
-                $info_string);
+            file_put_contents(C\CRAWL_DIR . "/schedules/" .
+                $this->process_name . "Messages.txt", $info_string);
             chmod(C\CRAWL_DIR."/schedules/". $this->process_name .
                 "Messages.txt", 0777);
@@ -872,7 +871,6 @@ class QueueServer implements CrawlConstants, Join
         $close_file = C\CRAWL_DIR.'/schedules/'.self::index_closed_name.
         if (!file_exists($close_file) &&
             strcmp($this->server_type, self::BOTH) != 0) {
             file_put_contents($close_file, "2");
@@ -905,10 +903,9 @@ class QueueServer implements CrawlConstants, Join
         $crawl_status['CRAWL_TIME'] = $this->crawl_time;
         $crawl_status['COUNT'] = 0;
         $crawl_status['DESCRIPTION'] = $message;
-        file_put_contents(
-            C\CRAWL_DIR."/schedules/crawl_status.txt",
+        file_put_contents(C\CRAWL_DIR . "/schedules/crawl_status.txt",
-        chmod(C\CRAWL_DIR."/schedules/crawl_status.txt", 0777);
+        chmod(C\CRAWL_DIR . "/schedules/crawl_status.txt", 0777);
      * When a crawl is being shutdown, this function is called to write
@@ -931,7 +928,7 @@ class QueueServer implements CrawlConstants, Join
         L\crawlLog("Writing queue contents back to schedules...");
-        $dir = C\CRAWL_DIR."/schedules/".self::schedule_data_base_name.
+        $dir = C\CRAWL_DIR."/schedules/" . self::schedule_data_base_name .
         if (!file_exists($dir)) {
@@ -989,8 +986,9 @@ class QueueServer implements CrawlConstants, Join
                     $data_string = L\webencode(
                     $data_hash = L\crawlHash($data_string);
-                    file_put_contents($dir."/At".$schedule_time."From127-0-0-1".
-                        $note_string. "WithHash$data_hash.txt", $data_string);
+                    file_put_contents($dir."/At" . $schedule_time .
+                        "From127-0-0-1". $note_string .
+                        "WithHash$data_hash.txt", $data_string);
                     $data_string = "";
                     $schedule_data[self::TO_CRAWL] = [];
@@ -1007,7 +1005,7 @@ class QueueServer implements CrawlConstants, Join
             } else {
                 $schedule_time = $time;
-            file_put_contents($dir."/At".$schedule_time."From127-0-0-1".
+            file_put_contents($dir."/At" . $schedule_time . "From127-0-0-1".
                 $note_string . "WithHash$data_hash.txt", $data_string);
@@ -1026,7 +1024,7 @@ class QueueServer implements CrawlConstants, Join
-                index_archive->addCurrentShardDictionary();
+                index_archive->addAdvanceGeneration();
diff --git a/src/library/CrawlConstants.php b/src/library/CrawlConstants.php
index 7a632d132..176fe2242 100755
--- a/src/library/CrawlConstants.php
+++ b/src/library/CrawlConstants.php
@@ -231,8 +231,7 @@ interface CrawlConstants
     const SCRAPER_LABEL = 'du';
     const SCRAPERS = 'dv';
-    const IS_NEWS = "dw";
-    const QUESTION_ANSWERS = 'dx';
-    const CONTENT_SIZE = 'dy';
-    const NO_RANGE = 'dz';
+    const QUESTION_ANSWERS = 'dw';
+    const CONTENT_SIZE = 'dx';
+    const NO_RANGE = 'dy';
diff --git a/src/library/IndexArchiveBundle.php b/src/library/IndexArchiveBundle.php
index d2343cce0..eba02a888 100644
--- a/src/library/IndexArchiveBundle.php
+++ b/src/library/IndexArchiveBundle.php
@@ -252,6 +252,8 @@ class IndexArchiveBundle implements CrawlConstants
     public function addAdvanceGeneration($callback = null)
+        echo "Resaving active shard without prefix and dictionary etc\n";
+        $this->getActiveShard()->saveWithoutDictionary();
         //Set up new shard
         $this->generation_info['CURRENT'] =
@@ -320,8 +322,7 @@ class IndexArchiveBundle implements CrawlConstants
             $current_index_shard_file = $this->dir_name .
                 "/posting_doc_shards/index". $this->generation_info['CURRENT'];
             if (file_exists($current_index_shard_file)) {
-                if (isset($this->generation_info['DISK_BASED']) &&
-                    $this->generation_info['DISK_BASED'] == true) {
+                if (!empty($this->generation_info['DISK_BASED'])) {
                     $this->current_shard = new IndexShard(
diff --git a/src/library/IndexDictionary.php b/src/library/IndexDictionary.php
index af324a20b..2dd219f0e 100644
--- a/src/library/IndexDictionary.php
+++ b/src/library/IndexDictionary.php
@@ -250,7 +250,7 @@ class IndexDictionary implements CrawlConstants
             // write prefixes
-            $fh = fopen($this->dir_name."/$i/0".$out_slot.".dic", "wb");
+            $fh = fopen($this->dir_name . "/$i/0" . $out_slot . ".dic", "wb");
             fwrite($fh, substr($prefix_string, $i * $prefix_header_size,
             $j = $num_prefix_letters;
@@ -301,7 +301,7 @@ class IndexDictionary implements CrawlConstants
         for ($i = 0; $i < self::NUM_PREFIX_LETTERS; $i++) {
             crawlTimeoutLog("..processing first index prefix $i of ".
                 self::NUM_PREFIX_LETTERS." in $tier.");
-            $this-> mergeTierFiles($i, $tier, $out_slot);
+            $this->mergeTierFiles($i, $tier, $out_slot);
@@ -322,7 +322,7 @@ class IndexDictionary implements CrawlConstants
         $prefix_header_size = self::PREFIX_HEADER_SIZE;
         $fh_a = fopen( $file_a, "rb");
         $fh_b = fopen( $file_b, "rb");
-        $fh_out = fopen( $this->dir_name."/$prefix/".($tier + 1).
+        $fh_out = fopen( $this->dir_name . "/$prefix/" . ($tier + 1) .
             "$out_slot.dic", "wb+");
         $prefix_bit = ($prefix & 128) ? 0 : 128;
         // Scan past prefix headers
@@ -530,8 +530,8 @@ class IndexDictionary implements CrawlConstants
      * @param string $record_b a dictionary record including auxiliary records
      *      from the 'b'th file of the give tier
      * @param int $prefix_bit either 0 or 32768. The first bit of an auxiliary
-     *      record should be ~higher order bit of the given prefix letter
-     *      used by the tier file.
+     *      record should be negation of higher order bit of the given prefix
+     *      letter used by the tier file.
      * @return string a single record with merged strings making use of
      *      auxliary records as needed containing
      *      (generation, posting list offset, length) information.
@@ -563,7 +563,7 @@ class IndexDictionary implements CrawlConstants
         if (count($aux_records) == 3) {
             $record .=  chr($prefix_bit + ($num_aux_records >> 8)) .
-                chr($num_aux_records & 255). implode("", $aux_records);
+                chr($num_aux_records & 255) . implode("", $aux_records);
             $aux_records = [];
@@ -609,7 +609,7 @@ class IndexDictionary implements CrawlConstants
         $posting_info = str_split(substr($record_string, $offset + 2, 30), 10);
         if (!isset($posting_info[2]) ){
             crawlLog("Decode Aux Record failed...".
-                toHexString($record_string)."  ".$offset);
+                toHexString($record_string)."  " . $offset);
             crawlLog(print_r($posting_info, true));
             crawlLog(print_r(debug_backtrace(), true));
@@ -716,9 +716,6 @@ class IndexDictionary implements CrawlConstants
      * @param bool $raw whether the id is our version of base64 encoded or not
      * @param int $shift how many low order bits to drop from $word_id's
      *    when checking for a match
-     * @param string $mask bit mask to be applied to bytes after the 8th
-     *     byte through 20th byte of word_id. In single word case these
-     *     bytes contain safe:, media:, and class: meta word info
      * @param int $threshold if greater than zero how many posting list
      *    results in dictionary info returned before stopping looking for
      *    more matches
@@ -728,7 +725,7 @@ class IndexDictionary implements CrawlConstants
      * @return mixed an array of entries of the form
      *     generation, first offset, last offset, count
-     public function getWordInfo($word_id, $raw = false, $shift = 0, $mask = "",
+     public function getWordInfo($word_id, $raw = false, $shift = 0,
         $threshold = -1, $start_generation = -1, $num_distinct_generations = -1,
         $with_remaining_total = false)
@@ -738,8 +735,7 @@ class IndexDictionary implements CrawlConstants
         $current_max_generation = -2;
         foreach ($this->active_tiers as $tier) {
             $tier_info = $this->getWordInfoTier($word_id, $raw, $tier, $shift,
-                $mask, $threshold, $start_generation,
-                $num_distinct_generations);
+                $threshold, $start_generation, $num_distinct_generations);
             if (is_array($tier_info) && isset($tier_info[2]) &&
                 is_array($tier_info[2])) {
                 list($found_count, $max_found_generation,
@@ -790,9 +786,6 @@ class IndexDictionary implements CrawlConstants
      * @param int $tier which tier to get word info from
      * @param int $shift how many low order bits to drop from $word_id's
      *    when checking for a match
-     * @param string $mask bit mask to be applied to bytes after the 8th
-     *      byte through 20th byte of word_id. In single word case these
-     *      bytes contain safe:, media:, and class: meta word info
      * @param int $threshold if greater than zero how many posting list
      *      results in dictionary info returned before stopping looking for
      *      more matches
@@ -807,8 +800,7 @@ class IndexDictionary implements CrawlConstants
      *      no data
      public function getWordInfoTier($word_id, $raw, $tier, $shift = 0,
-        $mask = "", $threshold = -1, $start_generation = -1,
-        $num_distinct_generations = -1)
+        $threshold = -1, $start_generation = -1, $num_distinct_generations = -1)
         $num_generations = 0;
         $max_retained_generation = -1;
@@ -827,17 +819,12 @@ class IndexDictionary implements CrawlConstants
         if (strlen($word_id) < 1) {
             return false;
-        if ($mask != "") {
-            $mask_len = min(11, strlen($mask));
-        } else {
-            $mask_len = 0;
-        }
         $word_item_len = $word_key_len + IndexShard::WORD_DATA_LEN;
         $word_data_len = IndexShard::WORD_DATA_LEN;
         $file_num = ord($word_id[0]);
             Entries for a particular shard have postings for both
-            docs and links. If an entry has more than max_entry_len
+            docs and links. If an entry has more than max_entry_count
             we will assume entry somehow got corrupted and skip that
             generation for that word. Because we are including link have
             set threshold to 5 * number of docs that could be in a shard
@@ -912,7 +899,7 @@ class IndexDictionary implements CrawlConstants
         $id_info = [];
         $num_aux_records = (ord($word_string[$word_key_len]) << 8) +
             ord($word_string[$word_key_len + 1]);
-        $word_string = "\x00\x00".substr($word_string, $word_key_len + 2);
+        $word_string = "\x00\x00" . substr($word_string, $word_key_len + 2);
         $tmp = IndexShard::getWordInfoFromString($word_string, true);
         $check_and_auxes = 1;
         if ($tmp[3] < $max_entry_count) {
@@ -920,10 +907,11 @@ class IndexDictionary implements CrawlConstants
             $previous_id = $id;
             $remember_generation = $previous_generation;
             if ($start_generation <= $previous_generation) {
-                if ($this->checkMaskAndAdd($id, $word_id, $mask, $mask_len,
+                $this->addLookedUpEntry($id, $word_id,
                     $tmp, $info, $total_count, $previous_generation,
                     $previous_id, $num_generations, $num_distinct_generations,
-                    $max_retained_generation, $id_info) && $num_aux_records>0) {
+                    $max_retained_generation, $id_info);
+                if ($num_aux_records > 0) {
                     $this->addAuxInfoRecords($id ,$file_num, $num_aux_records,
                         $total_count, $threshold, $info, $previous_generation,
                         $num_generations, $start +
@@ -946,7 +934,7 @@ class IndexDictionary implements CrawlConstants
            single records get corrupted.
         $break_count = 0;
-        /* we found one match so far (ignoring mask), we are now backing up
+        /* we found one match so far, we are now backing up
            to look for earlier matches
         while ($test_loc >= $low) {
@@ -997,11 +985,12 @@ class IndexDictionary implements CrawlConstants
                     $num_generations < $num_distinct_generations ||
                     $current_generation <= $max_retained_generation
                     )) {
-                    if ($this->checkMaskAndAdd($id, $word_id, $mask, $mask_len,
+                    $this->addLookedUpEntry($id, $word_id,
                         $tmp, $info, $total_count, $previous_generation,
                         $previous_id, $num_generations,
                         $num_distinct_generations, $max_retained_generation,
-                        $id_info) && $num_aux_records > 0) {
+                        $id_info);
+                    if ($num_aux_records > 0) {
                         $this->addAuxInfoRecords($id, $file_num,
                             $num_aux_records, $total_count, $threshold, $info,
                             $previous_generation, $num_generations, $start +
@@ -1020,7 +1009,7 @@ class IndexDictionary implements CrawlConstants
         $test_loc = $check_loc + $check_and_auxes;
         $previous_generation = $remember_generation;
         $break_count = 0;
-        /* from the first match we found (ignoring mask), we are now looking
+        /* from the first match we found, we are now looking
            forward to find matches
         while ($test_loc <= $high) {
@@ -1050,11 +1039,11 @@ class IndexDictionary implements CrawlConstants
                     $num_generations < $num_distinct_generations ||
                     $current_generation <= $max_retained_generation
                     )) {
-                    if ($this->checkMaskAndAdd($id, $word_id, $mask, $mask_len,
-                        $tmp, $info, $total_count, $previous_generation,
-                        $previous_id, $num_generations,
-                        $num_distinct_generations, $max_retained_generation,
-                        $id_info) && $num_aux_records > 0) {
+                    $this->addLookedUpEntry($id, $word_id, $tmp, $info,
+                        $total_count, $previous_generation, $previous_id,
+                        $num_generations, $num_distinct_generations,
+                        $max_retained_generation, $id_info);
+                    if ($num_aux_records > 0) {
                         $this->addAuxInfoRecords($id, $file_num,
                             $num_aux_records, $total_count, $threshold, $info,
                             $previous_generation, $num_generations, $start +
@@ -1078,7 +1067,8 @@ class IndexDictionary implements CrawlConstants
      * a given word id can't be stored in a single record
      * @param string $id word id to add aux records for
-     * @param int $file_num
+     * @param int $file_num which prefix file to read from (always reads
+     *     a file at the max_tier level)
      * @param int $num_aux_records
      * @param int& $total_count
      * @param int $threshold
@@ -1129,7 +1119,9 @@ class IndexDictionary implements CrawlConstants
                     $id_info[$record[0]][] = count($info);
                     $info[] = $record;
                     $total_count += $record[3];
-                    if ($threshold > 0 && $total_count > $threshold) { return; }
+                    if ($threshold > 0 && $total_count > $threshold) {
+                        return;
+                    }
                     $previous_generation = $record[0];
@@ -1155,18 +1147,14 @@ class IndexDictionary implements CrawlConstants
      * This method is used when computing the array of
      * (generation, posting_list_start, len, exact_word_id) quadruples when
-     * looking up a $word_id in an index dictionary. It checks
-     * if the $id of a dictionary row matches $word_id up to the $mask info.
-     * If so, it adds the word record to the quadruple array $info that has been
+     * looking up a $word_id in an index dictionary. It adds the
+     * word record to the quadruple array $info that has been
      * calculated so far. It also update $total_count, and as well as
      * $previous info for the previous matching record.
      * @param string $id of a row to compare $word_id against
      * @param string $word_id the word id of a term or phrase we are computing
      *     the quadruple array for
-     * @param string $mask up to 9 byte wask used to say which materialized
-     *     meta words should be checked for when doing a match
-     * @param int $mask_len this should be strlen($mask)
      * @param array $record current record from dictionary that we may or may
      *     not add to info
      * @param array& $info quadruple array we are adding to
@@ -1177,61 +1165,40 @@ class IndexDictionary implements CrawlConstants
      * @param int $num_distinct_generations
      * @param int& $max_retained_generation
      * @param array& $id_info
-     * @return bool whether the record was added
-    public function checkMaskAndAdd($id, $word_id, $mask, $mask_len, $record,
+    public function addLookedUpEntry($id, $word_id, $record,
         &$info, &$total_count, &$previous_generation, &$previous_id,
         &$num_generations, $num_distinct_generations,
         &$max_retained_generation, &$id_info)
         $record[4] = $id;
-        $add_flag = true;
-        if ($mask != "" && strlen($id) > 9 && strlen($word_id) > 9 &&
-            substr_compare($id, $word_id, 9, $mask_len) != 0) {
-            $k = 0;
-            $old_k = 0;
-            while(($k = strpos($mask, "\xFF", $old_k)) !== false) {
-                $loc = $k + 8;
-                if (isset($id[$loc]) && $id[$loc] != $word_id[$loc]) {
-                    $add_flag = false;
-                    break;
-                }
-                if ($k == $old_k) {
-                    $k++;
-                }
-                $old_k = $k;
-            }
-        }
-        if ($add_flag) {
             //adding to the end is front is slower than tacking to end
-            if ($num_distinct_generations > 0) {
-                if (!isset($id_info[$record[0]])) {
-                    $id_info[$record[0]] = [];
-                    if ($num_generations >= $num_distinct_generations) {
-                        if (isset($id_info[$max_retained_generation])) {
-                            foreach ($id_info[$max_retained_generation] as
-                                $key) {
-                                $total_count -= $info[$key][3];
-                                $info[$key] = false;
-                            }
-                            unset($id_info[$max_retained_generation]);
-                        }
-                        $max_retained_generation = max(array_keys($id_info));
-                    } else {
-                        $num_generations++;
-                        if ($record[0] > $max_retained_generation) {
-                            $max_retained_generation = $record[0];
+        if ($num_distinct_generations > 0) {
+            if (!isset($id_info[$record[0]])) {
+                $id_info[$record[0]] = [];
+                if ($num_generations >= $num_distinct_generations) {
+                    if (isset($id_info[$max_retained_generation])) {
+                        foreach ($id_info[$max_retained_generation] as
+                            $key) {
+                            $total_count -= $info[$key][3];
+                            $info[$key] = false;
+                        unset($id_info[$max_retained_generation]);
+                    }
+                    $max_retained_generation = max(array_keys($id_info));
+                } else {
+                    $num_generations++;
+                    if ($record[0] > $max_retained_generation) {
+                        $max_retained_generation = $record[0];
-                $id_info[$record[0]][] = count($info);
-            $info[] = $record;
-            $total_count += $record[3];
-            $previous_generation = $record[0];
-            $previous_id = $id;
-        return $add_flag;
+        $id_info[$record[0]][] = count($info);
+        $info[] = $record;
+        $total_count += $record[3];
+        $previous_generation = $record[0];
+        $previous_id = $id;
      * Gets from disk $len many bytes beginning at $offset from the
@@ -1302,4 +1269,4 @@ class IndexDictionary implements CrawlConstants
             $this->fhs[$file_num][$tier], self::DICT_BLOCK_SIZE);
         return $this->blocks[$file_num][$tier][$bytes];
\ No newline at end of file
diff --git a/src/library/IndexManager.php b/src/library/IndexManager.php
index af6d718ae..83b710377 100644
--- a/src/library/IndexManager.php
+++ b/src/library/IndexManager.php
@@ -149,8 +149,6 @@ class IndexManager implements CrawlConstants
      *     dictionary
      * @param int $shift if $hash is for a phrase, how many low order
      *     bits of word id to discard
-     * @param string $mask if $hash is for a word, after the 9th byte what
-     *     meta word mask should be applied to the 20 byte hash
      * @param int $threshold after the number of results exceeds this amount
      *     stop looking for more dictionary entries.
      * @param int $start_generation
@@ -161,28 +159,18 @@ class IndexManager implements CrawlConstants
      *      that match $hash)
     public static function getWordInfo($index_name, $hash, $shift = 0,
-        $mask = "", $threshold = -1, $start_generation = -1,
-        $num_distinct_generations = -1, $with_remaining_total = false)
+        $threshold = -1, $start_generation = -1, $num_distinct_generations = -1,
+        $with_remaining_total = false)
         $id = "$index_name:$start_generation:$num_distinct_generations";
         $index = self::getIndex($index_name);
-        $len = strlen($mask);
-        if ($len > 0) {
-            $pre_hash = substr($hash, 0, 8) .
-                "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
-        } else {
-            $pre_hash = $hash;
-        }
         $tmp = [];
-        $test_mask = "";
         if ((!C\nsdefined('NO_FEEDS') || !C\NO_FEEDS) &&
             $start_generation < 0
             && file_exists(C\WORK_DIRECTORY . "/feeds/index")) {
-            //NO_FEEDS defined true in statistic_controller.php
             $use_feeds = true;
             $feed_shard = self::getIndex("feed");
-            $feed_info = $feed_shard->getWordInfo($hash, true, $shift,
-                $mask);
+            $feed_info = $feed_shard->getWordInfo($hash, true, $shift);
             if (is_array($feed_info)) {
                 $tmp[-1] = [-1, $feed_info[0],
                     $feed_info[1], $feed_info[2], $feed_info[3]];
@@ -191,7 +179,7 @@ class IndexManager implements CrawlConstants
         if (!empty($index->dictionary)) {
             $pre_info =
                 $index->dictionary->getWordInfo($hash, true, $shift,
-                $mask, $threshold, $start_generation,
+                $threshold, $start_generation,
                 $num_distinct_generations, true);
         if (!empty($pre_info[1])) {
@@ -230,22 +218,12 @@ class IndexManager implements CrawlConstants
         $pos = -1;
         $total_num_docs = 0;
-        $hashes = allCrawlHashPaths($term_or_phrase, [], [], true);
-        if (!is_array($hashes)) {
-            $hashes = [$hashes];
-        }
+        $hashes = allCrawlHashPaths($term_or_phrase, true);
         foreach ($hashes as $hash) {
-            if (is_array($hash)) {
-                list($num_docs, ) =
-                    self::getWordInfo($index_name, $hash[0],
-                        $hash[1], $hash[2], $threshold, $start_generation,
-                        $num_distinct_generations, true);
-            } else {
-                list($num_docs, ) =
-                    self::getWordInfo($index_name, $hash, 0, "",
-                    $threshold, $start_generation, $num_distinct_generations,
-                    true);
-            }
+            list($num_docs, ) =
+                self::getWordInfo($index_name, $hash[0],
+                    $hash[1], $threshold, $start_generation,
+                    $num_distinct_generations, true);
             $total_num_docs += $num_docs;
             if ($threshold > 0 && $total_num_docs > $threshold) {
                    return $total_num_docs;
diff --git a/src/library/IndexShard.php b/src/library/IndexShard.php
index 0b24c8df7..4dcedaa7b 100644
--- a/src/library/IndexShard.php
+++ b/src/library/IndexShard.php
@@ -323,7 +323,6 @@ class IndexShard extends PersistentStructure implements
      * @param array $word_lists (word => array of word positions in doc)
      * @param array $meta_ids meta words to be associated with the document
      *     an example meta word would be filetype:pdf for a PDF document.
-     * @param array $materialized_metas
      * @param bool $is_doc flag used to indicate if what is being sored is
      *     a document or a link to a document
      * @param mixed $rank either false if not used, or a 4 bit estimate of the
@@ -331,8 +330,7 @@ class IndexShard extends PersistentStructure implements
      * @return bool success or failure of performing the add
     public function addDocumentWords($doc_keys, $summary_offset, $word_lists,
-        $meta_ids = [], $materialized_metas = [], $is_doc = false,
-        $rank = false)
+        $meta_ids = [], $is_doc = false, $rank = false)
         if ($this->word_docs_packed == true) {
             $this->words = [];
@@ -343,7 +341,9 @@ class IndexShard extends PersistentStructure implements
         $link_doc_len = 0;
         $len_key = strlen($doc_keys);
         $num_keys = floor($len_key/self::DOC_KEY_LEN);
-        if ($num_keys * self::DOC_KEY_LEN != $len_key) { return false; }
+        if ($num_keys * self::DOC_KEY_LEN != $len_key) {
+            return false;
+        }
         if ($num_keys % 2 == 0 ) {
             $doc_keys .= self::BLANK; //want to keep docids_len divisible by 16
@@ -358,17 +358,16 @@ class IndexShard extends PersistentStructure implements
         foreach ($meta_ids as $meta_id) {
             $word_lists[$meta_id] = [];
-        $meta_string = encodeMaterialMetas($meta_ids, $materialized_metas);
         //using $this->docids_len divisible by 16
         $doc_offset = $this->docids_len >> 4;
         foreach ($word_lists as $word => $position_list) {
             $occurrences = count($position_list);
             if (isset($position_list["cond_max"])) { //for now
-                $word_id = crawlHashPath($word,
-                    $position_list["cond_max"], [], [], true);
+                $word_id = crawlHashPath($word, $position_list["cond_max"],
+                    true);
             }  else {
-                $word_id = crawlHashWord($word, true, $meta_string);
+                $word_id = crawlHashWord($word, true);
             $store = packPosting($doc_offset, $position_list);
             if (!isset($this->words[$word_id])) {
@@ -417,12 +416,10 @@ class IndexShard extends PersistentStructure implements
      * @param bool $raw whether the id is our version of base64 encoded or not
      * @param int $shift how many low order bits to drop from $word_id's
      *    when checking for a match
-     * @param string $mask if $hash is for a word, after the 9th byte what
-     *     meta word mask should be applied to the 20 byte hash
      * @return array first offset, last offset, count, exact matching id (
      *     recall match can ignore low order shift bits)
-    public function getWordInfo($word_id, $raw = false, $shift = 0, $mask = "")
+    public function getWordInfo($word_id, $raw = false, $shift = 0)
         if ($raw == false) {
             //get rid of out modified base64 encoding
@@ -431,7 +428,6 @@ class IndexShard extends PersistentStructure implements
         $is_disk = $this->read_only_from_disk;
         $word_item_len = self::WORD_KEY_LEN + self::WORD_DATA_LEN;
         $word_key_len = self::WORD_KEY_LEN;
-        $mask_len = strlen($mask);
         if ($is_disk) {
             if (!isset($word_id[1])) {
@@ -465,48 +461,6 @@ class IndexShard extends PersistentStructure implements
             $id = substr($word_string, 0, $word_key_len);
             $cmp = compareWordHashes($word_id, $id, $shift);
             if ($cmp === 0) {
-                $found = false;
-                $orig_id = $id;
-                $old_check_loc = $check_loc;
-                while (compareWordHashes($word_id, $id, $shift) == 0 &&
-                    $check_loc >= $low) {
-                    if ($check_loc != $old_check_loc) {
-                        $word_string = $this->getWordString($is_disk, $start,
-                            $check_loc, $word_item_len);
-                        if ($word_string == false) {
-                            break;
-                        }
-                        $id = substr($word_string, 0, $word_key_len);
-                    }
-                    if (matchingWordMetas($word_id, $id, $mask, $mask_len)) {
-                        $found = true;
-                        break;
-                    }
-                    $check_loc--;
-                }
-                $check_loc = $old_check_loc;
-                $id = $orig_id;
-                if (!$found) {
-                    while (compareWordHashes($word_id, $id, $shift) == 0 &&
-                        $check_loc <= $high) {
-                        if ($check_loc != $old_check_loc) {
-                            $word_string = $this->getWordString($is_disk,
-                                $start, $check_loc, $word_item_len);
-                            if ($word_string == false) {
-                                break;
-                            }
-                            $id = substr($word_string, 0, $word_key_len);
-                        }
-                        if (matchingWordMetas($word_id, $id, $mask,$mask_len)) {
-                            $found = true;
-                            break;
-                        }
-                        $check_loc++;
-                    }
-                }
-                if (!$found) {
-                    return false;
-                }
                 $tmp_info = $this->getWordInfoFromString(
                     substr($word_string, $word_key_len));
                 $tmp_info[] = $id;
@@ -668,7 +622,8 @@ class IndexShard extends PersistentStructure implements
         $item[self::DOC_LEN] = $doc_len;
         $item[self::IS_DOC] = $is_doc;
-        $item[self::PROXIMITY]=$this->computeProximity($position_list, $is_doc);
+        $item[self::PROXIMITY] =
+            $this->computeProximity($position_list, $is_doc);
         $occurrences = $this->weightedCount($position_list, $is_doc);
         //override $occurrences if $occurs != 0
         if ($occurs != 0) {
@@ -776,6 +731,9 @@ class IndexShard extends PersistentStructure implements
             self::TITLE => 0,
             self::DESCRIPTION => 0,
             self::LINKS => 0];
+        if (!is_array($position_list)) {
+            return $count;
+        }
         foreach ($position_list as $position) {
             if ($is_doc) {
                 if ($position < C\AD_HOC_TITLE_LENGTH) {
@@ -1289,10 +1247,10 @@ class IndexShard extends PersistentStructure implements
             crawlLog("Saving index shard .. done merge postings to string");
-        if ($with_logging) {
-            crawlLog("Saving index shard .. make prefixes");
-        }
-        $header =  pack("N*", $this->prefixes_len ,
+            if ($with_logging) {
+                crawlLog("Saving index shard .. make prefixes");
+            }
+        $header =  pack("N*", $this->prefixes_len,
@@ -1337,6 +1295,31 @@ class IndexShard extends PersistentStructure implements
         $this->word_docs_packed = false;
         return $out;
+    /**
+     * This method re-saves a saved shard without the prefixes and dictionary.
+     * It would typically be called after this information has been stored
+     * in an IndexDictionary obbject so that the data is not redundantly stored
+     */
+    public function saveWithoutDictionary()
+    {
+        $this->getShardHeader();
+        $header =  pack("N*", 0, 0,
+            $this->word_docs_len,
+            $this->docids_len,
+            $this->generation,
+            $this->num_docs_per_generation,
+            $this->num_docs,
+            $this->num_link_docs,
+            $this->len_all_docs,
+            $this->len_all_link_docs);
+        $word_docs = $this->getWordDocsSubstring();
+        $doc_infos = $this->getDocInfoSubstring();
+        $fh = fopen($this->filename, "wb");
+        fwrite($fh, $header);
+        fwrite($fh, $word_docs);
+        fwrite($fh, $doc_infos);
+        fclose($fh);
+    }
      * Computes the prefix string index for the current words array.
      * This index gives offsets of the first occurrences of the lead two char's
@@ -1394,7 +1377,7 @@ class IndexShard extends PersistentStructure implements
      * Posting lists are initially stored associated with a word as a key
      * value pair. The merge operation then merges them these to a string
-     * help by word_postings. packWords separates words from postings.
+     * by word_postings. packWords separates words from postings.
      * After being applied words is a string consisting of
      * triples (as concatenated strings) word_id, start_offset, end_offset.
      * The offsets refer to integers offsets into a string $this->word_docs
@@ -1504,7 +1487,6 @@ class IndexShard extends PersistentStructure implements
             $postings = substr($this->word_postings,
                 $pos + $key_len + $posting_len, $len);
             $pos += $key_len + $posting_len + $len;
             if ($len != $two_doc_len ||
                 strncmp($postings,  self::HALF_BLANK, self::POSTING_LEN) != 0) {
                 if ($fh != null) {
@@ -1582,8 +1564,11 @@ class IndexShard extends PersistentStructure implements
      * @param $len number of bytes to get
      * @return desired string
-    public function getWordDocsSubstring($offset, $len)
+    public function getWordDocsSubstring($offset = 0, $len = 0)
+        if ($len <= 0) {
+            $len = $this->word_docs_len;
+        }
         if ($this->read_only_from_disk) {
             return $this->getShardSubstring($this->word_doc_offset + $offset,
@@ -1611,8 +1596,11 @@ class IndexShard extends PersistentStructure implements
      * @param $len number of bytes to get
      * @return desired string
-    public function getDocInfoSubstring($offset, $len)
+    public function getDocInfoSubstring($offset = 0, $len = 0)
+        if ($len <= 0) {
+            $len = $this->docids_len;
+        }
         if ($this->read_only_from_disk) {
             return $this->getShardSubstring(
                 $this->doc_info_offset + $offset, $len, false);
@@ -1870,4 +1858,4 @@ class IndexShard extends PersistentStructure implements
             substr($value, self::WORD_KEY_LEN,
\ No newline at end of file
diff --git a/src/library/PhraseParser.php b/src/library/PhraseParser.php
index 46efbdaa8..14eceb76a 100755
--- a/src/library/PhraseParser.php
+++ b/src/library/PhraseParser.php
@@ -60,11 +60,6 @@ class PhraseParser
         'path:', 'robot:', 'safe:', 'server:', 'site:', 'size:',
         'time:', 'u:', 'version:','weight:', 'w:'
-    /**
-     * Those meta words whose values will be encoded as part of word_ids
-     * @var array
-     */
-    public static $materialized_metas = ["class:", "media:", "safe:"];
      * A list of meta words that might be extracted from a query
      * @var array
@@ -1076,57 +1071,6 @@ class PhraseParser
         $link_meta_ids[] = "link:all";
         return $link_meta_ids;
-    /**
-     * Given the word key of a term (a hash of the term string which may
-     * have materialized meta information such as media type encoded in it),
-     * compute the media type.
-     * @param string $word_key hash of term with encoded metas
-     * @return string what media type it is such as Text, Image, News, Video
-     *      if it can be determined and unknown otherwise.
-     */
-    public static function getMediaType($word_key)
-    {
-        if (strlen($word_key) < 10) {
-            return "unknown";
-        }
-        $media_char = $word_key[9];
-        $media_types = ["media:text" => "Text", "media:image" => "Image",
-            "media:video" => "Video", "media:news" => "News"];
-        foreach ($media_types as $type => $common_name) {
-            $material_meta_string = encodeMaterialMetas([$type],
-                PhraseParser::$materialized_metas);
-            if ($material_meta_string[0] == $media_char) {
-                return $common_name;
-            }
-        }
-        return "Unknown";
-    }
-    /**
-     * Given the word key of a term (a hash of the term string which may
-     * have materialized meta information such as safe (not X-rated) search
-     * info encoded in it), compute the safe value.
-     * @param string $word_key hash of term with encoded metas
-     * @return string whether the term is associated with a "safe" page
-     *      in which case the string "True" is returned; an "unsafe" page
-     *      in which case the string "False" is returned; or "Undefined"
-     *      if it cannot be determined from the word key
-     */
-    public static function getSafety($word_key)
-    {
-        if (strlen($word_key) < 11) {
-            return "unknown";
-        }
-        $safety_char = $word_key[10];
-        $safety_types = ["safe:true" => "True", "safe:false" => "False"];
-        foreach ($safety_types as $type => $common_name) {
-            $material_meta_string = encodeMaterialMetas([$type],
-                PhraseParser::$materialized_metas);
-            if ($material_meta_string[1] == $safety_char) {
-                return $common_name;
-            }
-        }
-        return "Undefined";
-    }
      * Computes the Cosine-similarity of two phrases
diff --git a/src/library/Thesaurus.php b/src/library/Thesaurus.php
deleted file mode 100644
index 77dd37858..000000000
--- a/src/library/Thesaurus.php
+++ /dev/null
@@ -1,361 +0,0 @@
- * SeekQuarry/Yioop --
- * Open Source Pure PHP Search Engine, Crawler, and Indexer
- *
- * Copyright (C) 2009 - 2018  Chris Pollett
- *
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <>.
- *
- *
- * @author Shailesh Padave
- * @license GPL3
- * @link
- * @copyright 2009 - 2018
- * @filesource
- */
-namespace seekquarry\yioop\library;
-use seekquarry\yioop\configs as C;
-/** For Yioop global defines */
-require_once __DIR__."/../configs/Config.php";
- * Class used to reorder the last 10 links computed by PhraseModel based on
- * thesaurus semantic information. For English, thesaurus semantic information
- * can be provided by WordNet, a lexical English database
- * available at
- * To enable, you this have to define WORDNET_EXEC in your local_config file.
- * The idea behind thresaurus reordering is that given a query, it
- * is tagged for parts of speech. Each term is then looked up in thesaurus for
- * those parts of speech. Representative phrases for those term senses are
- * extracted from the ranked thesaurus output and a set of rewrites of the
- * original query are created. By looking up the number
- * of times these rewrites occur in the searched index the top two phrases
- * that represent the original query are computed.The BM25 similarity of these
- * phrases is then scored against each of the 10 output summaries of
- * PhraseModel and used to reorder the results.
- * To add thesaurus reordering for a different locale, two methods need to be
- * written in that locale tokenizer.php file
- * tagPartsOfSpeechPhrase($phrase) which on an input phrase return a string
- *     where each term_i in the phrase has been replace with term_i~pos
- *     where pos is a two character part of speech NN, VB, AJ, AV, or NA (if
- *     none of the previous apply)
- * scoredThesaurusMatches($term, $word_type, $whole_query) which takes
- *     a term from an original whole_query which has been tagged to be
- *     one of the types VB (for verb), NN (for noun), AJ (for adjective),
- *     AV (for adverb), or NA (for anything else), it outputs
- *     a sequence of  (score => array of thesaurus terms) associations.
- *     The score representing one word sense of term
- * Given that these methods have been implemented if the use_thesaurus field
- * of that language tokenizer is set to true, the thesaurus will be used.
- */
-class Thesaurus
-    /**
-     * Extracts similar phrases to the input query using thesaurus results.
-     * Part of speech tagging is processed on input and the output is
-     * looked up in the thesaurus. USing this a ranked list of alternate
-     * query phrases is created.
-     * For those phrases, counts in the Yioop index are calculated
-     * and the top two phrases are selected.
-     * @param string $orig_query input query from user
-     * @param string $index_name selected index for search engine
-     * @param string $lang locale tag for the query
-     * @param integer $threshold once count in posting list for any word
-     *     reaches to threshold then return the number
-     * @return array of top two words
-     */
-    public static function getSimilarPhrases($orig_query, $index_name,
-        $lang, $threshold = 10)
-    {
-        $num_docs = [];
-        $scores = [];
-        $suggested_queries =
-            self::getInitialSuggestions($orig_query, $lang);
-        foreach ($suggested_queries as $suggestion) {
-            $num_docs[$suggestion] =
-                self::numDocsIndex($suggestion, $threshold, $index_name, $lang);
-        }
-        arsort($num_docs);
-        $result = [];
-        $i = 0;
-        foreach ($num_docs as $k => $v) {
-            $result[$i] = $k;
-            $i++;
-            if ($i >= 2) { break; }
-        }
-        return $result;
-    }
-    /**
-     * Gets array of BM25 scores for given input array of summaries
-     * and thesaurus generated queries
-     * @param array $similar_phrases an array of thesaurus generated queries
-     * @param array $summaries an array of summaries which is generated
-     *     during crawl time.
-     * @return array of BM25 score for each document based on the thesaurus
-     * simimar phrases
-     */
-    public static function scorePhrasesSummaries($similar_phrases, $summaries)
-    {
-        $score = [];
-        //if there are no similar words then
-        if (empty($similar_phrases)) {
-            return [];
-        } else {
-            $num_phrases = count($similar_phrases);
-            for ($i = 0; $i < $num_phrases; $i++) {
-                $phrase = $similar_phrases[$i];
-                $terms = explode(' ', $phrase);
-                $summaries = self::changeCaseOfStringArray($summaries);
-                $idf = self::calculateIDF($summaries, $terms);
-                $tf = self::calculateTFBM25($summaries, $terms);
-                $num_summaries = count($summaries);
-                $num_terms = count($terms);
-                $bm25_result[$i] =
-                    self::calculateBM25($idf, $tf, $num_terms, $num_summaries);
-            }
-            if (count($bm25_result) == 1) {
-                for ($i = 0; $i < $num_summaries; $i++) {
-                    $temp = 0;
-                    $temp = $bm25_result[0][$i];
-                    $score[$i] = $temp;
-                }
-            } else {
-                for ($i = 0; $i < $num_summaries; $i++) {
-                    $temp = 0;
-                    $temp = $bm25_result[0][$i] * (2/3) +
-                        $bm25_result[1][$i] * (1/3);
-                    $score[$i] = $temp;
-                }
-            }
-            return $score;
-        }
-    }
-    /**
-     * Computes suggested related phrases from thesaurus based on part of
-     * speech  done on each query term.
-     *
-     * @param string $query query entered by user
-     * @param string $lang locale tag for the query
-     * @return string array $suggestion consisting of phrases suggested to
-     *     be similar in meaning to some sens of the query
-     */
-    public static function getInitialSuggestions($query, $lang)
-    {
-        $tokenizer = PhraseParser::getTokenizer($lang);
-        $pos_query = $tokenizer->tagPartsOfSpeechPhrase($query);
-        $max_len = 25;
-        $replacement_phrases = [];
-        $suggestions = [];
-        $terms = preg_split("/\s+|\-/", trim($query));
-        $pos_terms = preg_split("/\s+/",
-            trim($pos_query), -1, PREG_SPLIT_NO_EMPTY);
-        $num_pos_terms = count($pos_terms);
-        $word_type = null;
-        $similar_words = [];
-        $known_word_types = ["NN", "VB", "AJ", "AV"];
-        for ($i = 0; $i < $num_pos_terms; $i++) {
-            $pos = strpos($pos_terms[$i], '~');
-            $word_type = trim(substr($pos_terms[$i], $pos + 1));
-            if (!in_array($word_type, $known_word_types)) {
-                $word_type = "NA";
-            }
-            $current_word = substr($pos_terms[$i], 0, $pos);
-            if ($word_type != "NA") {
-                $similar_phrases = $tokenizer->scoredThesaurusMatches(
-                    $current_word, $word_type, $query);
-                $highest_scoring_sense_phrases = ($similar_phrases) ?
-                    array_shift($similar_phrases): false;
-                if ($highest_scoring_sense_phrases) {
-                    $replacement_phrases[$current_word] =
-                        $highest_scoring_sense_phrases;
-                }
-            }
-        }
-        $i = 0;
-        foreach ($replacement_phrases as $words => $similar_phrases) {
-            foreach ($similar_phrases as $phrase) {
-                if (mb_strpos(trim($phrase), ' ') !== false) {
-                    $phrase = preg_replace('/~[\w]+/', '', $phrase);
-                }
-                $modified_query = preg_replace(
-                    '/' . $words . '/', trim($phrase), $query);
-                if (mb_strlen($modified_query) < $max_len &&
-                    mb_strpos($modified_query, $query) === false) {
-                    $suggestions[$i] = $modified_query;
-                    $i++;
-                }
-            }
-        }
-        return $suggestions;
-    }
-    /**
-     * Returns the number of documents in an index that a phrase occurs in.
-     * If it occurs in more than threshold documents then cut off search.
-     *
-     * @param string $phrase to look up in index
-     * @param int $threshold once count in posting list for any word
-     *     reaches to threshold then return the number
-     * @param string $index_name selected index for search engine
-     * @param string $lang locale tag for the query
-     * @return int number of documents phrase occurs in
-     */
-    public static function numDocsIndex($phrase, $threshold, $index_name, $lang)
-    {
-        PhraseParser::canonicalizePunctuatedTerms($phrase, $lang);
-        $terms = PhraseParser::stemCharGramSegment($phrase, $lang);
-        $num  = count($terms);
-        if ($index_name == null) {
-            return 0;
-        }
-        if (count($terms) > C\MAX_QUERY_TERMS) {
-            $terms  = array_slice($terms, 0, C\MAX_QUERY_TERMS);
-        }
-        $whole_phrase = implode(" ", $terms);
-        return IndexManager::numDocsTerm($whole_phrase, $index_name,
-            $threshold);
-    }
-    /**
-     * Lower cases an array of strings
-     *
-     * @param array $summaries strings to put into lower case
-     * @return array with strings converted to lower case
-     */
-    public static function changeCaseOfStringArray($summaries)
-    {
-        return explode("-!-", mb_strtolower(implode("-!-", $summaries)));
-    }
-    /**
-     * Computes the BM25 of an array of documents given that the idf and
-     * tf scores for these documents have already been computed
-     *
-     * @param array $idf inverse doc frequency for given query array
-     * @param array $tf term frequency for given query array
-     * @param $num_terms number of terms that make up input query
-     * @param $num_summaries count for input summaries
-     * @returns array consisting of BM25 scores for each document
-     */
-    public static function calculateBM25($idf, $tf, $num_terms, $num_summaries)
-    {
-        $scores = [];
-        for ($i = 0; $i < $num_terms; $i++) {
-            for ($j = 0; $j < $num_summaries; $j++) {
-                $bm25_score[$i][$j] = $idf[$i] * $tf[$i][$j];
-            }
-        }
-        for ($i = 0; $i < $num_summaries; $i++) {
-            $val = 0;
-            for ($j = 0; $j < $num_terms; $j++) {
-                $val += $bm25_score[$j][$i];
-            }
-            $scores[$i] = $val;
-        }
-        return $scores;
-    }
-    /**
-     * Calculates the BM25 normalized term frequency of a set of terms in
-     * a collection of text summaries
-     *
-     * @param array $summaries list of summary strings to compute BM25TF w.r.t
-     * @param array $terms we want the term frequency computation for
-     * @return array $tfbm25 a 2d array with rows being indexed by terms and
-     *     columns indexed by summaries and the values of an entry being
-     *     the tfbm25 score for that term in that document
-     */
-    public static function calculateTFBM25($summaries, $terms)
-    {
-        $k1 = 1.5;
-        $b = 0.75;
-        $tf_values = [];
-        $tfbm25 = [];
-        $doc_length = strlen(implode("", $summaries));
-        $num_summaries = count($summaries);
-        if ($num_summaries!= 0) {
-            $avg_length = $doc_length / $num_summaries;
-        } else {
-            $avg_length = 0;
-        }
-        $avg_length = max($avg_length, 1);
-        $tf_values = self::calculateTermFreq($summaries, $terms);
-        $num_terms =count($terms);
-        for ($i = 0; $i < $num_terms; $i++) {
-            for ($j = 0; $j < $num_summaries; $j++) {
-                $frequency = $tf_values[$i][$j];
-                $tfbm25[$i][$j] =
-                    ($frequency * ($k1 + 1))/($frequency + $k1 *
-                    ((1 - $b) + $b * ($doc_length/$avg_length)));
-            }
-        }
-        return $tfbm25;
-    }
-    /**
-     * Computes a 2D array of the number of occurences of term i in document j
-     *
-     * @param array $summaries documents to compute frequencies in
-     * @param array $terms terms to compute frequencies for
-     * @return array 2D array as described above
-     */
-    public static function calculateTermFreq($summaries, $terms)
-    {
-        $tf_values = [];
-        $num_terms = count($terms);
-        $num_summaries = count($summaries);
-        for ($i = 0; $i < $num_terms; $i++) {
-            for ($j = 0; $j < $num_summaries; $j++) {
-                if ($terms[$i] != "") {
-                    $frequency = substr_count($summaries[$j], $terms[$i]);
-                    $tf_values[$i][$j] = $frequency;
-                } else {
-                    $tf_values[$i][$j] = 0;
-                }
-            }
-        }
-        return $tf_values;
-    }
-    /**
-     * To get the inverse document frequencies for a collection of terms in
-     * a set of documents.
-     * IDF(term_i) = log_10(# of document / # docs term i in)
-     *
-     * @param array $summaries documents to use in calculating IDF score
-     * @param array $terms terms to compute IDF score for
-     * @return array $idf 1D-array saying the inverse document frequency for
-     * each term
-     */
-    public static function calculateIDF($summaries, $terms)
-    {
-        $N = count($summaries);
-        $Nt = [];
-        $term_count = 0;
-        $num_terms = count($terms);
-        for ($i = 0; $i < $num_terms; $i++) {
-            $cnt_Nt = 0;
-            $term_count++;
-            foreach ($summaries as $summary)
-            {
-                if (stripos($summary, $terms[$i]) !== false) {
-                    $cnt_Nt++;
-                }
-            }
-            $Nt[$i] = $cnt_Nt;
-            $idf[$i] = ($Nt[$i] != 0) ? log10($N / $Nt[$i]) : 0;
-        }
-        return $idf;
-    }
diff --git a/src/library/Utility.php b/src/library/Utility.php
index aadb01782..2d7fe1807 100755
--- a/src/library/Utility.php
+++ b/src/library/Utility.php
@@ -36,7 +36,7 @@ namespace seekquarry\yioop\library;
 use seekquarry\yioop\configs as C;

 /** For Yioop global defines */
-require_once __DIR__."/../configs/Config.php";
+require_once __DIR__ . "/../configs/Config.php";
  * Adds delimiters to a regex that may or may not have them
@@ -50,8 +50,8 @@ function addRegexDelimiters($expression)
     $last = $expression[$len - 1];
     if (($first != $last && $len > 1) || $len == 1) {
         $expression = ($first != '/' ) ?
-            "/".$expression."/"
-            : "@".$expression."@";
+            "/" . $expression . "/"
+            : "@" . $expression . "@";
     return $expression;
@@ -233,7 +233,6 @@ function vByteDecode(&$str, &$offset)
         $pos_int += (ord($str[$offset] & 127) << $shift);
         $shift += 7;
     return $pos_int;
@@ -285,7 +284,7 @@ function packPosting($doc_index, $position_list, $delta = true)
 function unpackPosting($posting, &$offset, $dedelta = true)
-    $delta_list = decodeModified9($posting, $offset);
+    $delta_list = (array) decodeModified9($posting, $offset);
     $doc_index = array_shift($delta_list);
     if (($doc_index & (2 << 26)) > 0) {
         $delta0 = ($doc_index & ((2 << 9) - 1));
@@ -946,17 +945,14 @@ function crawlHash($string, $raw = false)
  * @param string $string word to hash
  * @param bool $raw whether to base64Hash the result
- * @param $meta_string the up to 11 byte string of meta information
  * @return string first 8 bytes of md5 of $string concatenated with \x00
  *     to indicate the hash is of a word not a phrase concatenated  with the
  *     padded to 11 byte $meta_string.
-function crawlHashWord($string, $raw = false, $meta_string = "")
+function crawlHashWord($string, $raw = false)
     $pre_hash = substr(md5($string, true), 0, 8) .
-        "\x00";
-    $meta_string = substr($meta_string, 0, 11);
-    $pre_hash .= $meta_string;
+        "\x00" . substr($string, 0, 11);
     $pre_hash = str_pad($pre_hash, 20, "\x00");
     /* low order bytes all 0 -- distinguishes it from a crawlHashPath */
     if (!$raw) {
@@ -973,24 +969,15 @@ function crawlHashWord($string, $raw = false, $meta_string = "")
  * maximal.
  * @param string $string what to find hashes for
- * @param array $metas array of meta word values
- * @param array $encode_metas a list of meta word names to encode in word_ids
  * @param bool $raw whether to base64 the result
  * @return array of hashes with appropriates shifts if needed
-function allCrawlHashPaths($string, $metas = [], $encode_metas = [],
-    $raw = false)
+function allCrawlHashPaths($string, $raw = false)
-    $mask = "";
-    if ($encode_metas != []) {
-        $mask_num = min(11, count($encode_metas));
-        $found_materialized_metas = findMaterialMetas($metas, $encode_metas);
-        foreach ($encode_metas as $meta) {
-            $mask .= (isset($found_materialized_metas[$meta])) ? "\xFF": "\x00";
-        }
-    }
     $pos = -1;
     $hashes = [];
+    $last_entry = null;
+    $new_entry = null;
     $zero = "*";
     $shift = 0;
     $num_spaces = substr_count($string, " ");
@@ -1000,8 +987,7 @@ function allCrawlHashPaths($string, $metas = [], $encode_metas = [],
         $old_pos = $pos;
         $path_string = $string;
         for ($i = 0; $i < $num; $i++) {
-            $hash = crawlHashPath($path_string, $pos + 1, $metas,
-                $encode_metas, $raw);
+            $hash = crawlHashPath($path_string, $pos + 1, $raw);
             if ($i > 0 && $j > 0) {
                 $path_len = $num_spaces - $j + 1 + $i;
                 if ($path_len < 4) {
@@ -1075,97 +1061,23 @@ function allCrawlHashPaths($string, $metas = [], $encode_metas = [],
                         $shift = 64 + 29 * ($i - 12);
-                $hashes[] = [$hash, $shift, $mask];
-            } else if ($mask != "") {
-                $hashes[] = [$hash, $shift, $mask];
+                $new_entry = [$hash, $shift];
             } else {
-                $hashes[] = $hash;
+                $new_entry = [$hash, 0];
+            }
+            if ($new_entry != $last_entry) {
+                $hashes[] = $new_entry;
+            }
+            if ($j == 0) {
+                break;
-            if ($j == 0) {break; }
             $path_string .= " " . $zero;
         $pos = mb_strpos($string, " ", $pos + 1);
-        $encode_metas = [];
     } while($pos > 0 && $old_pos != $pos);
-    if (count($hashes) == 1) {
-        return $hashes[0];
-    }
     return $hashes;
- * Give an array of values for meta words (for example, media:video, lang:en)
- * and an array of names of meta words to be encoded into word_id's
- * (for example, media:, safe:, class:) return an associative array of pairs
- * (meta word name =>array(value of that name)) which should be encoded
- * into word id's
- *
- * @param array $metas array of meta word values
- * @param array $encode_metas a list of meta word names to encode in word_ids
- * @return array $found_materialized_metas associative array of name =>
- *     values for that name
- */
-function findMaterialMetas($metas, $encode_metas)
-    $found_materialized_metas = [];
-    foreach ($metas as $meta_id) {
-        if ($encode_metas != []) {
-            $match_kinds = explode(":", $meta_id);
-            $next_char = (isset($match_kinds[1][0])) ? $match_kinds[1][0] :
-                ord('a');
-            $is_class = ($match_kinds[0] == 'class');
-            if (count($match_kinds) > 1 &&
-                in_array($match_kinds[0].":", $encode_metas) &&
-                !in_array($match_kinds[1], ["all"]) &&
-                !isset($match_kinds[2])) {
-                    $found_materialized_metas[$match_kinds[0].":"][] =
-                        $meta_id;
-            }
-        }
-    }
-    return $found_materialized_metas;
- * Give an array of values for meta words (for example, media:video)
- * and an array of names of meta words to be encoded into word_id's
- * (for example, media:, safe:, class:) returns a string mask for the
- * byte positions in a word_id after the 9th byte. The format of a word id
- * in the case of a single word is described in the documentation for
- * @see crawlHashPath
- *
- * @param array $metas a list of meta word values extracted from a query
- *      string or document.
- * @param array $encode_metas a list of meta word names that should be encoded
- *      in word id's For example, (media:, safe:, class:)
- * @return string a 9 byte string where encoded meta word values have been
- *     stored
- */
-function encodeMaterialMetas($metas, $encode_metas)
-    if (!is_array($encode_metas) || empty($encode_metas)) {
-        return "";
-    }
-    $found_materialized_metas = findMaterialMetas($metas, $encode_metas);
-    $meta_string = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
-    foreach ($found_materialized_metas as $name => $values) {
-        foreach ($values as $value) {
-            if ($name == 'class:' && isset($value[6])) {
-                $pre_meta_pos = ord($value[6]);
-                /*
-                   positions for classifier classes start at 2
-                 */
-                $meta_pos = (($pre_meta_pos) % 9) + 2;
-            } else {
-                /* m is first char of media, s is first char of s
-                   offset will be 1 if safe, 0 if media
-                 */
-                $meta_pos = (ord($name[0]) > ord('m')) ? 1 : 0;
-            }
-            $meta_string[$meta_pos] = substr(crawlHash($value, true), 0, 1);
-        }
-    }
-    return $meta_string;
  * Given a string makes an 20 byte hash path - where first 8 bytes is
  * a hash of the string before path start, last 12 bytes is the path
@@ -1185,36 +1097,22 @@ function encodeMaterialMetas($metas, $encode_metas)
  * If $path_start is 0 behaves like crawlHashWord(). The above encoding is
  * typically used to make word_ids for whole phrases, to make word id's
  * for single words, the format is
- * (64 bits for word, 1 byte null, remaining 11 bytes encode an materialized
- * meta words present in document or query string). Of this 11 bytes,
- * the first is used for the meta word media:, so if the document is of type
- * media:image, then a single byte hash of media:image gives the value of this
- * byte. The second byte encodes the meta word safe: in a similar fashion.
- * The remaining 9 bytes encode different values of the class: meta word.
- * To encode class:some_value., first class:some_value[0] is hashed to a value
- * j betwen 0 and 8. Then class:some_value is hash to a single byte b. Then
- * the jth value of the remaining bytes is set to b. Non affected bytes are
- * null.
+ * (64 bits for word, 1 byte null, then ignored 11 bytes ).
  * @param string $string what to hash
  * @param int $path_start what to use as the split between 5 byte front
  *     hash and the rest
- * @param array $metas meta word values from a document or query string
- * @param array $encode_metas a list of names of meta word values which should
- *     encoded into word ids. i.e., (media:, safe:, class:) or none.
  * @param bool $raw whether to modified base64 the result
  * @return string 8 bytes that results from this hash process
-function crawlHashPath($string, $path_start = 0, $metas = [],
-    $encode_metas = [], $raw = false)
+function crawlHashPath($string, $path_start = 0, $raw = false)
     if ($path_start > 0 ) {
         $string_parts = explode(" ", substr($string, $path_start));
         $num_parts = count($string_parts);
     if ($path_start == 0 || $num_parts == 0) {
-        $meta_string = encodeMaterialMetas($metas, $encode_metas);
-        $hash = crawlHashWord($string, true, $meta_string);
+        $hash = crawlHashWord($string, true);
         if (!$raw) {
             $hash = base64Hash($hash);
@@ -1227,7 +1125,6 @@ function crawlHashPath($string, $path_start = 0, $metas = [],
     $path_ints = [];
     $modes = [3, 3, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13];
     $mode_nums = [1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6];
     foreach ($string_parts as $part) {
         if ($part == "*") {
             $path_ints[] = 0;
@@ -1236,7 +1133,9 @@ function crawlHashPath($string, $path_start = 0, $metas = [],
     $num_parts = count($path_ints);
-    if ($num_parts > 13) {$num_parts = 13; }
+    if ($num_parts > 13) {
+        $num_parts = 13;
+    }
     $mode = $modes[$num_parts];
     $mode_num = $mode_nums[$num_parts];
     switch ($mode) {
@@ -1268,7 +1167,6 @@ function crawlHashPath($string, $path_start = 0, $metas = [],
                 + ($path_ints[4] & $mask)) << $shift)
                 + ($path_ints[5] & $mask)) << $shift)
                 + ($path_ints[6] & $mask);
         case 9:
             $path_ints[8] = isset($path_ints[8]) ? $path_ints[8] : 0;
@@ -1366,11 +1264,7 @@ function crawlHashPath($string, $path_start = 0, $metas = [],
 function compareWordHashes($id1, $id2, $shift = 0)
-    if (!isset($id1[8]) || !isset($id2[8])) {
-        return strncmp($id1, $id2, 8);
-    } else if ($id1[8] == "\x00") {
-        return strncmp($id1, $id2, 9);
-    } else if ($shift < 32) {
+    if ($shift < 32) {
         $cmp = strncmp($id1, $id2, 16);
     } else if ($shift < 64) {
         $cmp = strncmp($id1, $id2, 12);
@@ -1393,35 +1287,6 @@ function compareWordHashes($id1, $id2, $shift = 0)
     $id2 = packInt(unpackInt(substr($id2, $pos, 4)) >> $shift);
     return strcmp($id1, $id2);
- * Check if two word id's match according to a mask of the last 12 bytes.
- *
- * @param string $word_id 20 byte word id to compare
- * @param string $id 20 byte word id to compare
- * @param string $mask what mask to use
- * @param string $mask_len the length of the mask
- * @return bool true if match; false otherwise
- */
-function matchingWordMetas($word_id, $id, $mask = "", $mask_len = 0)
-    if ($mask != "" && strlen($id) > 9 && strlen($word_id) > 9 &&
-        substr_compare($id, $word_id, 9, $mask_len) != 0) {
-        $k = 0;
-        $old_k = 0;
-        while(($k = strpos($mask, "\xFF", $old_k)) !== false) {
-            $loc = $k + 8;
-            if (isset($id[$loc]) && $id[$loc] != $word_id[$loc]) {
-                return false;
-                break;
-            }
-            if ($k == $old_k) {
-                $k++;
-            }
-            $old_k = $k;
-        }
-    }
-    return true;
  * Converts a crawl hash number to something closer to base64 coded but
  * so doesn't get confused in urls or DBs
@@ -1983,28 +1848,6 @@ function generalIsA($class_1, $class_2)
     return (is_a($class_1, $class_2) || is_subclass_of($class_1, $class_2));
- * Given an array of arrays acting much like a database table, this
- * returns a sequence of key value pairs, where the keys are the distinct
- * entries in $key_column and the values are the counts of numbers in
- * $count_column for each particular key;
- *
- * @param array $arr an array of arrays
- * @param mixed $key_column (string or int) field name of key column
- * @param mixed $count_column (string or int) field name of count column
- * @return array key => values pairs of counts
- */
-function arrayColumnCount($arr, $key_column, $count_column)
-    $out_arr = [];
-    foreach ($arr as $row) {
-        if (!isset($out_arr[$row[$key_column]])) {
-            $out_arr[$row[$key_column]] = 0;
-        }
-        $out_arr[$row[$key_column]] += $row[$count_column];
-    }
-    return $out_arr;
  * Given the contents of a start XML/HMTL tag strips out all the attributes
  * non listed in $safe_attribute_list
diff --git a/src/library/WebArchiveBundle.php b/src/library/WebArchiveBundle.php
index 65ab1e202..47e3bc8fb 100755
--- a/src/library/WebArchiveBundle.php
+++ b/src/library/WebArchiveBundle.php
@@ -118,7 +118,7 @@ class WebArchiveBundle
             $info = unserialize(
         } else {
-            $this->version = 1;
+            $this->version = C\DEFAULT_CRAWL_FORMAT;
         if (isset($info['NUM_DOCS_PER_PARTITION'])) {
             $this->num_docs_per_partition = $info['NUM_DOCS_PER_PARTITION'];
diff --git a/src/library/index_bundle_iterators/IndexBundleIterator.php b/src/library/index_bundle_iterators/IndexBundleIterator.php
index 16041c1df..af078e5a4 100644
--- a/src/library/index_bundle_iterators/IndexBundleIterator.php
+++ b/src/library/index_bundle_iterators/IndexBundleIterator.php
@@ -132,10 +132,6 @@ abstract class IndexBundleIterator implements CrawlConstants
         if (isset($this->word_key)) {
             $out .= "Word Key: " . L\toHexString($this->word_key)."\n";
             $out .= "Index Name: ".$this->index_name."\n";
-            $out .= "Media Type: ".PhraseParser::getMediaType(
-                $this->word_key) . "\n";
-            $out .= "Safe: ". PhraseParser::getSafety($this->word_key) . "\n";
         $out .= "Number of Docs: ".$this->num_docs;
         if (isset($this->index_bundle_iterator)) {
diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php
index 1fa4d0cab..5742d9a1f 100644
--- a/src/library/index_bundle_iterators/WordIterator.php
+++ b/src/library/index_bundle_iterators/WordIterator.php
@@ -49,20 +49,21 @@ use seekquarry\yioop\library\IndexManager;
 class WordIterator extends IndexBundleIterator
-     * hash of word that the iterator iterates over
+     * hash of word or phrase that the iterator iterates over
      * @var string
     public $word_key;
-     * The timestamp of the index is associated with this iterator
-     * @var string
+     * Position from end of key that doesn't have to be an exact match
+     * (for phrases as using suffix tree)
+     * @var int
-    public $index_name;
+    public $shift;
-     * The byte mask to apply against the word id
+     * The timestamp of the index is associated with this iterator
      * @var string
-    public $mask;
+    public $index_name;
      * First shard generation that word info was obtained for
      * @var int
@@ -155,6 +156,8 @@ class WordIterator extends IndexBundleIterator
      * Creates a word iterator with the given parameters.
      * @param string $word_key hash of word or phrase to iterate docs of
+     * @param string $shift up to what point in key should be a match
+     *      when do dictionary look up (for phrases because using suffix tree)
      * @param string $index_name time_stamp of the to use
      * @param bool $raw whether the $word_key is our variant of base64 encoded
      * @param array $filter an array of hashes of domains to filter from
@@ -165,34 +168,33 @@ class WordIterator extends IndexBundleIterator
      *      gotten out of this iterator (may be reordered later). This flag
      *      controls whether an upper bound of self::LIMIT_FEEDS_COUNT is
      *      imposed on the number of feed results returned
-     * @param string $mask byte mask to apply against word id, default is for
-     *     exact match
-    public function __construct($word_key, $index_name, $raw = false,
+    public function __construct($word_key, $shift, $index_name, $raw = false,
         &$filter = null,
         $results_per_block = IndexBundleIterator::RESULTS_PER_BLOCK,
-        $limit_feeds = false,
-        $mask = "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF")
+        $limit_feeds = false)
         if ($raw == false) {
             //get rid of out modified base64 encoding
             $word_key = L\unbase64Hash($word_key);
+        if (L\crawlHashWord("media:news", true) == $word_key) {
+            $this->is_news = true;
+        }
         if ($filter != null) {
             $this->filter = & $filter;
         } else {
             $this->filter = null;
         $this->word_key = $word_key;
+        $this->shift = $shift;
         $this->index_name =  $index_name;
-        $this->mask = $mask;
         list($estimated_total, $this->dictionary_info) =
-            IndexManager::getWordInfo($index_name, $word_key, 0,
-            $mask, -1, -1, C\NUM_DISTINCT_GENERATIONS, true);
+            IndexManager::getWordInfo($index_name, $word_key, $shift,
+            -1, -1, C\NUM_DISTINCT_GENERATIONS, true);
         $this->feed_shard_name = C\WORK_DIRECTORY."/feeds/index";
         if ((!C\nsdefined('NO_FEEDS') || !C\NO_FEEDS)
             && file_exists($this->feed_shard_name)) {
-            //NO_FEEDS defined true in statistic_controller.php
             $this->use_feeds = true;
         } else {
             $this->use_feeds = false;
@@ -306,8 +308,8 @@ class WordIterator extends IndexBundleIterator
             if ($this->start_generation > 0) {
                 list($estimated_total, $this->dictionary_info) =
-                    $this->word_key, 0, $this->mask, -1, 0,
-                    C\NUM_DISTINCT_GENERATIONS, true);
+                    $this->word_key, 0, -1, 0, C\NUM_DISTINCT_GENERATIONS,
+                    true);
                 $this->num_docs = $this->feed_count + $estimated_total;
                 $this->dictionary_info = array_values($this->dictionary_info);
@@ -362,13 +364,8 @@ class WordIterator extends IndexBundleIterator
                     $this->next_offset, $this->feed_end,
                 $time = time();
-                // C1 is the materialized meta for media:news
-                $is_news = ($this->word_key[9] == "\xC1") ? true : false;
                 foreach ($pre_results as $keys => $pre_result) {
                     $pre_results[$keys][self::IS_FEED] = true;
-                    if ($is_news) {
-                        $pre_results[$keys][self::IS_NEWS] = true;
-                    }
                     $delta = $time - $pre_result[self::SUMMARY_OFFSET];
                     $pre_results[$keys][self::DOC_RANK] = 720000 /
                         max($delta, 1);
@@ -545,8 +542,7 @@ class WordIterator extends IndexBundleIterator
                 $this->generation_pointer >= $this->num_generations) {
                 list($estimated_remaining_total, $info) =
-                    $this->word_key, 0,
-                    $this->mask, -1, $this->num_generations,
+                    $this->word_key, 0, -1, $this->num_generations,
                     C\NUM_DISTINCT_GENERATIONS, true);
                 if (count($info) > 0) {
                     $this->num_docs = $this->seen_docs +
diff --git a/src/library/indexing_plugins/RecipePlugin.php b/src/library/indexing_plugins/RecipePlugin.php
index b351be772..65c08b5dc 100644
--- a/src/library/indexing_plugins/RecipePlugin.php
+++ b/src/library/indexing_plugins/RecipePlugin.php
@@ -429,7 +429,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
-                    self::index_data_base_name.$index_name);
+                    self::index_data_base_name . $index_name);
             L\crawlLog("...Recipe plugin finished.");
diff --git a/src/library/media_jobs/FeedsUpdateJob.php b/src/library/media_jobs/FeedsUpdateJob.php
index 0f664d1d1..1c9d2137a 100644
--- a/src/library/media_jobs/FeedsUpdateJob.php
+++ b/src/library/media_jobs/FeedsUpdateJob.php
@@ -591,8 +591,7 @@ class FeedsUpdateJob extends MediaJob
                 $meta_ids = $this->calculateMetas($lang, $item['PUBDATE'],
                     $source_name, $item["GUID"], $media_category);
                 $prune_shard->addDocumentWords($doc_keys, $item['PUBDATE'],
-                    $word_and_qa_lists["WORD_LIST"], $meta_ids,
-                    PhraseParser::$materialized_metas, true, false);
+                    $word_and_qa_lists["WORD_LIST"], $meta_ids, true, false);
diff --git a/src/locale/ar/configure.ini b/src/locale/ar/configure.ini
index 7afb07372..8b3c5a6e4 100755
--- a/src/locale/ar/configure.ini
+++ b/src/locale/ar/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "البحث"
 search_view_no_index_set = ""
 search_view_calculated = "%s ثوان."
 search_view_results = "عرض  %s- %s من  %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "مؤقتاً"
@@ -810,7 +809,6 @@ search_view_inlink = "Inlinks"
 search_view_rank = "رتبة: %s"
 search_view_relevancy = "ق Rel:%"
 search_view_proximity = "ق Prox:%"
-search_view_thesaurus_score = ""
 search_view_score = "نقاط: %s"
 ; /src/views/elements
diff --git a/src/locale/bn/configure.ini b/src/locale/bn/configure.ini
index 22371535a..2e6aca5ee 100755
--- a/src/locale/bn/configure.ini
+++ b/src/locale/bn/configure.ini
@@ -800,7 +800,6 @@ search_view_search = ""
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ; /src/views/elements
diff --git a/src/locale/de/configure.ini b/src/locale/de/configure.ini
index 44abbd47d..1b5fc1df9 100755
--- a/src/locale/de/configure.ini
+++ b/src/locale/de/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Suche"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ; /src/views/elements
diff --git a/src/locale/en_US/configure.ini b/src/locale/en_US/configure.ini
index 4d8ecbda9..e1ea2704d 100644
--- a/src/locale/en_US/configure.ini
+++ b/src/locale/en_US/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Search"
 search_view_no_index_set = "No Default Index Set"
 search_view_calculated = "%s seconds."
 search_view_results = "Showing %s - %s of %s"
-search_view_thesaurus_results = "Thesaurus Results"
 search_view_possible_answer = "Possible Answer:"
 search_view_word_cloud = "Words:"
 search_view_cache = "Cached"
@@ -810,7 +809,6 @@ search_view_inlink = "Inlinks"
 search_view_rank = "Rank:%s "
 search_view_relevancy = "Rel:%s "
 search_view_proximity = "Prox:%s"
-search_view_thesaurus_score = "Thesaurus: %s"
 search_view_score = "Score:%s"
 ; /src/views/elements
diff --git a/src/locale/en_US/resources/Tokenizer.php b/src/locale/en_US/resources/Tokenizer.php
index 037134aa1..c1a18ede7 100755
--- a/src/locale/en_US/resources/Tokenizer.php
+++ b/src/locale/en_US/resources/Tokenizer.php
@@ -32,18 +32,11 @@ use seekquarry\yioop\configs as C;
 use seekquarry\yioop\library\PhraseParser;
 use seekquarry\yioop\library as L;

-/* If you would like to use wordnet for thesaurus reordering of query results
-   define the following variable in your configs/local_config.php file with
-   the path to the WordNet executable.
- */
-if (!C\nsdefined("WORDNET_EXEC")) {
-    C\nsdefine("WORDNET_EXEC", "");
  * This class has a collection of methods for English locale specific
  * tokenization. In particular, it has a stemmer, a stop word remover (for
- * use mainly in word cloud creation), and a part of speech tagger (if
- * thesaurus reordering used). The stemmer is my stab at implementing the
+ * use mainly in word cloud creation), and a part of speech tagger (for
+ * question answering). The stemmer is my stab at implementing the
  * Porter Stemmer algorithm
  * presented
  * The code is based on the non-thread safe C version given by Martin Porter.
@@ -115,16 +108,10 @@ class Tokenizer
     private static $j;
-     * The constructor for a tokenizer can be used to say that a thesaurus
-     * for final query reordering is present. For english we do this if
-     * the WORDNET_EXEC variable is set. In which case we use WordNet for
-     * our reordering
+     * Do any global set up for tokenizer (none in the case of en-US)
     public function __construct()
-        if (C\WORDNET_EXEC != "") {
-            $this->use_thesaurus = true;
-        }
      * Stub function which could be used for a word segmenter.
@@ -139,91 +126,6 @@ class Tokenizer
         return $pre_segment;
-    /**
-     * Computes similar words and scores from WordNet output based on word
-     * type.
-     *
-     * @param string $term term to find related thesaurus terms
-     * @param string $word_type is the type of word such as "NN" (noun),
-     *     "VB" (verb), "AJ" (adjective), or "AV" (adverb)
-     *     (all other types will be ignored)
-     * @param string $whole_query the original query $term came from
-     * @return array a sequence of
-     *     (score => array of thesaurus terms) associations. The score
-     *     representing one word sense of term
-     */
-    public static function scoredThesaurusMatches($term, $word_type,
-        $whole_query)
-    {
-        $word_map = ["VB" => "verb", "NN" => "noun", "AJ" => "adj",
-            "AV" => "adv"];
-        //Gets overview of senses of term[$i] into data
-        exec(C\WORDNET_EXEC . " $term -over", $data);
-        if (!$data || ! isset($word_map[$word_type])) { return null; }
-        $full_name = $word_map[$word_type];
-        $lexicon_output = implode("\n", $data);
-        $sense_parts = preg_split("/\bThe\s$full_name".'[^\n]*\n\n/',
-            $lexicon_output);
-        if (!isset($sense_parts[1])) {return null; }
-        list($sense, ) = preg_split("/\bOverview\sof\s/", $sense_parts[1]);
-        $definitions_for_sense = preg_split("/\d+\.\s/", $sense, -1,
-            PREG_SPLIT_NO_EMPTY);
-        $num_definitions = count($definitions_for_sense);
-        $sentence = [];
-        $similar_phrases = [];
-        $avg_scores = [];
-        for ($i = 0; $i < $num_definitions; $i++) {
-            //get sentence fragments examples of using that definition
-            preg_match_all('/\"(.*?)\"/', $definitions_for_sense[$i],
-                $matches);
-            // to separate out the words
-            preg_match('/[\w+\s\,\.\']+\s\-+/', $definitions_for_sense[$i],
-                $match_word);
-            $thesaurus_phrases = preg_split("/\s*\,\s*/",
-                strtolower(rtrim(trim($match_word[0]), "-")));
-            //remove ori ginal term from thesaurus phrases if present
-            $m = 0;
-            foreach ($thesaurus_phrases as $thesaurus_phrase) {
-                $tphrase = trim($thesaurus_phrase);
-                if ($tphrase == trim($term)) {
-                    unset($thesaurus_phrases[$m]);
-                }
-                $m++;
-            }
-            $thesaurus_phrases = array_filter($thesaurus_phrases);
-            if ($thesaurus_phrases == []) {continue;}
-            $num_example_sentences = count($matches[1]);
-            $score = [];
-            for ($j = 0; $j < $num_example_sentences; $j++) {
-                $query_parts = explode(' ', strtolower($whole_query));
-                $example_sentence_parts = explode(' ',
-                    strtolower($matches[1][$j]));
-                $score[$j] = PhraseParser::getCosineRank($query_parts,
-                    $example_sentence_parts);
-                /*  If Cosine similarity is zero then go for
-                 * intersection similarity ranking
-                 */
-                if ($score[$j] == 0) {
-                    $score[$j] = PhraseParser::getIntersection($query_parts,
-                        $example_sentence_parts);
-                }
-            }
-            /*  We use the rounded average of the above times 100 as a score
-                score for a definition. To avoid ties we store in the low
-                order digits 99 - the definition it was
-             */
-            if ($num_example_sentences > 0) {
-                $definition_score = 100 * round(
-                    100 * (array_sum($score) / $num_example_sentences))
-                    + (99 - $i);
-            } else {
-                $definition_score = 99 - $i;
-            }
-            $similar_phrases[$definition_score] = $thesaurus_phrases;
-        }
-        krsort($similar_phrases);
-        return $similar_phrases;
-    }
      * Removes the stop words from the page (used for Word Cloud generation)
@@ -320,7 +222,7 @@ class Tokenizer
-        'thus','til','tip','to','together','too',
+        'thus','til', 'till','tip','to','together','too',
diff --git a/src/locale/es/configure.ini b/src/locale/es/configure.ini
index 57efd1d02..450e51af0 100755
--- a/src/locale/es/configure.ini
+++ b/src/locale/es/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Buscar"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ; /src/views/elements
diff --git a/src/locale/fa/configure.ini b/src/locale/fa/configure.ini
index c73c7a95b..6c9aacf88 100755
--- a/src/locale/fa/configure.ini
+++ b/src/locale/fa/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "جستجو"
 search_view_no_index_set = ""
 search_view_calculated = "%s ثانیه"
 search_view_results = "در حال نمایش %s - %s از %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "کش شده"
@@ -810,7 +809,6 @@ search_view_inlink = "پیوندهای داخلی"
 search_view_rank = "رتبه: %s"
 search_view_relevancy = "ارتباط: %s"
 search_view_proximity = "نزدیکی: %s"
-search_view_thesaurus_score = ""
 search_view_score = "امتیاز: %s"
 ; /src/views/elements
diff --git a/src/locale/fr_FR/configure.ini b/src/locale/fr_FR/configure.ini
index 09574c152..84278455a 100755
--- a/src/locale/fr_FR/configure.ini
+++ b/src/locale/fr_FR/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Rechercher"
 search_view_no_index_set = ""
 search_view_calculated = "%s secondes."
 search_view_results = "Affichage de %s - %s sur %s r&eacute;sultats"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "En&nbsp;Cache"
@@ -810,7 +809,6 @@ search_view_inlink = "Liens retour"
 search_view_rank = "Rang: %s"
 search_view_relevancy = "Pertinence: %s"
 search_view_proximity = "Proximit&eacute;: %s"
-search_view_thesaurus_score = ""
 search_view_score = "Total: %s"
 ; /src/views/elements
diff --git a/src/locale/he/configure.ini b/src/locale/he/configure.ini
index c8fa6b01b..2a9cbfffd 100755
--- a/src/locale/he/configure.ini
+++ b/src/locale/he/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "חפש"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ; /src/views/elements
diff --git a/src/locale/hi/configure.ini b/src/locale/hi/configure.ini
index 5d9b1dba4..26f554ce0 100755
--- a/src/locale/hi/configure.ini
+++ b/src/locale/hi/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "खोज"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ; /src/views/elements
diff --git a/src/locale/in_ID/configure.ini b/src/locale/in_ID/configure.ini
index 2acc2dd67..9af7f978f 100755
--- a/src/locale/in_ID/configure.ini
+++ b/src/locale/in_ID/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Cari"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = "Hasil"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = "Urutan"
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ; /src/views/elements
diff --git a/src/locale/it/configure.ini b/src/locale/it/configure.ini
index 2004095e2..c8e0324b3 100755
--- a/src/locale/it/configure.ini
+++ b/src/locale/it/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Cerca"
 search_view_no_index_set = ""
 search_view_calculated = "Calccolati in %s secondi."
 search_view_results = "Mostra risultati %s - %s di %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "Archivio"
@@ -810,7 +809,6 @@ search_view_inlink = "Inlink"
 search_view_rank = "Pos.: %s "
 search_view_relevancy = "Rel: %s "
 search_view_proximity = "Pros: %s"
-search_view_thesaurus_score = ""
 search_view_score = "Punteggio %s"
 ; /src/views/elements
diff --git a/src/locale/ja/configure.ini b/src/locale/ja/configure.ini
index c07c80463..91ecd6c6d 100755
--- a/src/locale/ja/configure.ini
+++ b/src/locale/ja/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "検索"
 search_view_no_index_set = ""
 search_view_calculated = "%s分で計算しました。"
 search_view_results = "結果表示%s ー %s の %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "キャッシューしました。"
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = "ランク:%s"
 search_view_relevancy = "関連:%s"
 search_view_proximity = "近さ: %s"
-search_view_thesaurus_score = ""
 search_view_score = "スコア %s"
 ; /src/views/elements
diff --git a/src/locale/kn/configure.ini b/src/locale/kn/configure.ini
index 66abefcb7..de6e899c9 100755
--- a/src/locale/kn/configure.ini
+++ b/src/locale/kn/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "ಹುಡುಕು"
 search_view_no_index_set = ""
 search_view_calculated = "ಲೆಕ್ಕಾಚಾರದ ಸಮಯ %s ಸೆಕೆಂಡು"
 search_view_results = "ತೋರಿಸುತ್ತಿರುವ ಫಲಿತಾಂಶಗಳು %s - %s ಆಫ್ %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "ಸಿದ್ಧ ಸ್ಮೃತಿಕೋಶದಿಂದ ನೋಡಿ"
@@ -810,7 +809,6 @@ search_view_inlink = "ಒಳ ಕೊಂಡಿ"
 search_view_rank = "ಸ್ಥಾನ: %s"
 search_view_relevancy = "ಪ್ರಾಸ್ತಾವಿಕ: %s"
 search_view_proximity = "ಸಾನಿಧ್ಯ: %s"
-search_view_thesaurus_score = ""
 search_view_score = "ಅಂಕ: %s "
 ; /src/views/elements
diff --git a/src/locale/ko/configure.ini b/src/locale/ko/configure.ini
index 1734b5a6b..ef554515d 100755
--- a/src/locale/ko/configure.ini
+++ b/src/locale/ko/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "검색"
 search_view_no_index_set = ""
 search_view_calculated = "%s 초 결과 완료"
 search_view_results = "결과 %s - %s 의 %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "캐시 됀것"
@@ -810,7 +809,6 @@ search_view_inlink = "인링크"
 search_view_rank = "랭크: %s"
 search_view_relevancy = "관련성: %s "
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = "점수 %s"
 ; /src/views/elements
diff --git a/src/locale/nl/configure.ini b/src/locale/nl/configure.ini
index bf4a694da..79f67927c 100644
--- a/src/locale/nl/configure.ini
+++ b/src/locale/nl/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "zoeken"
 search_view_no_index_set = "Geen Standaard Index Set"
 search_view_calculated = " %s seconden."
 search_view_results = "Toont %s - %s van %s"
-search_view_thesaurus_results = "thesaurus Resultaten"
 search_view_possible_answer = ""
 search_view_word_cloud = "woorden:"
 search_view_cache = "gecached"
@@ -810,7 +809,6 @@ search_view_inlink = "inlinks"
 search_view_rank = "Rang: %s"
 search_view_relevancy = "Rel: %s"
 search_view_proximity = "Prox: %s"
-search_view_thesaurus_score = "Thesaurus: %s"
 search_view_score = "Score: %s"
 ; /src/views/elements
diff --git a/src/locale/pl/configure.ini b/src/locale/pl/configure.ini
index 42144e947..e632fa466 100755
--- a/src/locale/pl/configure.ini
+++ b/src/locale/pl/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Szukaj"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ; /src/views/elements
diff --git a/src/locale/pt/configure.ini b/src/locale/pt/configure.ini
index a703975b6..52ae0b154 100755
--- a/src/locale/pt/configure.ini
+++ b/src/locale/pt/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Pesquisa"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ; /src/views/elements
diff --git a/src/locale/ru/configure.ini b/src/locale/ru/configure.ini
index 345a881a0..cdacf6e7d 100755
--- a/src/locale/ru/configure.ini
+++ b/src/locale/ru/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Поиск"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ; /src/views/elements
diff --git a/src/locale/te/configure.ini b/src/locale/te/configure.ini
index f7f7d2c51..b4ec09493 100644
--- a/src/locale/te/configure.ini
+++ b/src/locale/te/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "అన్వేషించు"
 search_view_no_index_set = "డిఫాల్ట్ సూచిక సెట్ చేసి లేదు"
 search_view_calculated = "%s సెకన్లు"
 search_view_results = "చూపించేది %s - %s of %s"
-search_view_thesaurus_results = "థెసారస్ ఫలితాలు"
 search_view_possible_answer = ""
 search_view_word_cloud = "వర్డ్స్:"
 search_view_cache = "కేష్ చేయబడినవి"
@@ -810,7 +809,6 @@ search_view_inlink = "ఇన్ లింక్స్"
 search_view_rank = "రేంక్:%s"
 search_view_relevancy = "సంబంధిత:%s"
 search_view_proximity = "సామీప్యత:%s"
-search_view_thesaurus_score = "థెసారస్: %s"
 search_view_score = "స్కోర్:%s"
 ; /src/views/elements
diff --git a/src/locale/th/configure.ini b/src/locale/th/configure.ini
index bb94f8fbb..bfeaa253c 100755
--- a/src/locale/th/configure.ini
+++ b/src/locale/th/configure.ini
@@ -800,7 +800,6 @@ search_view_search = ""
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ; /src/views/elements
diff --git a/src/locale/tr/configure.ini b/src/locale/tr/configure.ini
index 7488d07d0..bfc4b6699 100755
--- a/src/locale/tr/configure.ini
+++ b/src/locale/tr/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "Ara"
 search_view_no_index_set = ""
 search_view_calculated = ""
 search_view_results = ""
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = ""
 search_view_relevancy = ""
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = ""
 ; /src/views/elements
diff --git a/src/locale/vi_VN/configure.ini b/src/locale/vi_VN/configure.ini
index 5e46a5366..453814424 100755
--- a/src/locale/vi_VN/configure.ini
+++ b/src/locale/vi_VN/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "T&igrave;m Kiếm"
 search_view_no_index_set = ""
 search_view_calculated = "%s gi&acirc;y."
 search_view_results = "Cho kết quả tứ %s - %s của %s"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = "Trang&nbsp;gốc"
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = "Thứ Tự: %s"
 search_view_relevancy = "Th&iacute;ch hợp: %s"
 search_view_proximity = "Gần: %s"
-search_view_thesaurus_score = ""
 search_view_score = "Điểm: %s"
 ; /src/views/elements
diff --git a/src/locale/zh_CN/configure.ini b/src/locale/zh_CN/configure.ini
index 1bf771e54..c26ccdc1a 100755
--- a/src/locale/zh_CN/configure.ini
+++ b/src/locale/zh_CN/configure.ini
@@ -800,7 +800,6 @@ search_view_search = "搜尋"
 search_view_no_index_set = ""
 search_view_calculated = "總計: %s 秒"
 search_view_results = "結果"
-search_view_thesaurus_results = ""
 search_view_possible_answer = ""
 search_view_word_cloud = ""
 search_view_cache = ""
@@ -810,7 +809,6 @@ search_view_inlink = ""
 search_view_rank = "排名: %s 名"
 search_view_relevancy = "關聯度:  %s 趴"
 search_view_proximity = ""
-search_view_thesaurus_score = ""
 search_view_score = "分數"
 ; /src/views/elements
diff --git a/src/models/ParallelModel.php b/src/models/ParallelModel.php
index 86a47b0bb..aa9ef9480 100755
--- a/src/models/ParallelModel.php
+++ b/src/models/ParallelModel.php
@@ -369,15 +369,14 @@ class ParallelModel extends Model
         if (!isset($index_archive->generation_info['ACTIVE'])) {
             return false;
-        $mask = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
         $num_generations = $index_archive->generation_info['ACTIVE'];
-        $hash_key = ($is_key) ? L\crawlHashWord($url_or_key, true, $mask) :
-            L\crawlHashWord("info:$url_or_key", true, $mask);
-        $info = IndexManager::getWordInfo($index_name, $hash_key, 0, $mask, 1);
+        $hash_key = ($is_key) ? L\crawlHashWord($url_or_key, true) :
+            L\crawlHashWord("info:$url_or_key", true);
+        $info = IndexManager::getWordInfo($index_name, $hash_key, 0, 1);
         if (!isset($info[0][4])) {
             return false;
-        $word_iterator = new WordIterator($info[0][4], $index_name, true);
+        $word_iterator = new WordIterator($info[0][4], 0, $index_name, true);
         if (is_array($next_docs = $word_iterator->nextDocsWithWord())) {
             $doc_info = current($next_docs);
             if (!$doc_info) {
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index 17dd9532a..63e457a83 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -35,7 +35,6 @@ use seekquarry\yioop\library as L;
 use seekquarry\yioop\library\AnalyticsManager;
 use seekquarry\yioop\library\IndexManager;
 use seekquarry\yioop\library\PhraseParser;
-use seekquarry\yioop\library\Thesaurus;
 use seekquarry\yioop\library\index_bundle_iterators as I;

@@ -494,10 +493,6 @@ class PhraseModel extends ParallelModel
             $results['TOTAL_ROWS'] > 0) {
             $output = $this->formatPageResults($results, $format_words,
-            if (isset($out_results['THESAURUS_VARIANTS'])) {
-                $output['THESAURUS_VARIANTS'] =
-                    $out_results['THESAURUS_VARIANTS'];
-            }
             if (!empty($answer_score_map)) {
@@ -534,10 +529,10 @@ class PhraseModel extends ParallelModel
         $in2 = $indent . $indent;
         $in3 = $in2 . $indent;
         $in4 = $in2. $in2;
-        $phrase = " ".$phrase;
+        $phrase = " " . $phrase;
         $phrase = $this->parseIfConditions($phrase);
         $phrase_string = $phrase;
-        list($found_metas, $found_materialized_metas, $disallow_phrases,
+        list($found_metas, $disallow_phrases,
             $phrase_string, $query_string, $index_name, $weight) =
@@ -585,8 +580,7 @@ class PhraseModel extends ParallelModel
                 $new_words =
                     PhraseParser::extractPhrases($phrase_part, $locale_tag,
-                if (isset($new_words[0]) && strpos($new_words[0], " ") > 0 &&
-                    $found_materialized_metas == []) {
+                if (isset($new_words[0]) && strpos($new_words[0], " ") > 0) {
                 $base_words = array_merge($base_words, $new_words);
@@ -597,11 +591,6 @@ class PhraseModel extends ParallelModel
         //stemmed, if have stemmer
         $index_version = IndexManager::getVersion($index_name);
         $add_metas = $found_metas;
-        $immaterial_metas = array_diff(
-            $found_metas, $found_materialized_metas);
-        if (count($immaterial_metas) > 0 && $index_version > 0) {
-            $add_metas = $immaterial_metas;
-        }
         $words = array_merge($base_words, $add_metas);
         if (count($words) == 0 && count($disallow_phrases) > 0) {
             $words[] = "site:any";
@@ -637,20 +626,7 @@ class PhraseModel extends ParallelModel
         if (isset($words) && count($words) == 1 &&
             count($disallow_phrases) < 1 && !strpos($words[0], " ")) {
             $phrase_string = $words[0];
-            if ($index_version == 0) {
-                $tmp_hash = L\allCrawlHashPaths($phrase_string);
-                $tmp_hash = (is_array($tmp_hash)) ? $tmp_hash : [$tmp_hash];
-                $phrase_hash = array_merge([$tmp_hash],
-                    [L\crawlHash($phrase_string)]);
-            } else {
-                if ($found_materialized_metas == []) {
-                    $phrase_hash = L\allCrawlHashPaths($phrase_string);
-                } else {
-                    $phrase_hash = L\allCrawlHashPaths($phrase_string,
-                        $found_materialized_metas,
-                        PhraseParser::$materialized_metas);
-                }
-            }
+            $phrase_hash = L\allCrawlHashPaths($phrase_string);
             $word_struct = ["KEYS" => [$phrase_hash],
                 "QUOTE_POSITIONS" => null, "DISALLOW_KEYS" => [],
                 "WEIGHT" => $weight, "INDEX_NAME" => $index_name,
@@ -658,32 +634,9 @@ class PhraseModel extends ParallelModel
         } else {
             //get a raw list of words and their hashes
             $hashes = [];
-            $metas_accounted = false;
-            $materialized_metas = [];
-            $meta_keys = [];
             $word_keys = [];
             foreach ($words as $word) {
-                if (!$metas_accounted && substr_count($word, " ") == 0
-                    && !in_array($word, $found_metas)) {
-                    $metas_accounted = true;
-                    $materialized_metas = $found_materialized_metas;
-                }
-                $tmp_hash = L\allCrawlHashPaths($word, $materialized_metas,
-                    PhraseParser::$materialized_metas);
-                if ($index_version == 0) {
-                    $tmp_hash = (is_array($tmp_hash)) ? $tmp_hash : [$tmp_hash];
-                    $test =  array_merge($tmp_hash, [L\crawlHash($word)]);
-                } else {
-                    if (in_array($word, $found_materialized_metas) &&
-                        !$metas_accounted) {
-                        $meta_keys[] = $tmp_hash;
-                    } else {
-                        $word_keys[] = $tmp_hash;
-                    }
-                }
-            }
-            if (!$metas_accounted) {
-                $word_keys = array_merge($word_keys, $meta_keys);
+                $word_keys[] = L\allCrawlHashPaths($word);
             if (count($word_keys) == 0) {
                 $word_keys = null;
@@ -774,7 +727,6 @@ class PhraseModel extends ParallelModel
         $index_name = $this->index_name;
         $weight = 1;
         $found_metas = [];
-        $found_materialized_metas = [];
         $disallow_phrases = [];
         $phrase_string = $phrase;
         $phrase_string = str_replace("&", "&amp;", $phrase_string);
@@ -791,24 +743,6 @@ class PhraseModel extends ParallelModel
                 ['i:', 'index:', 'w:', 'weight:', '\-'])) {
                 $matches = $matches[2];
                 $found_metas = array_merge($found_metas, $matches);
-                if (in_array($meta_word, PhraseParser::$materialized_metas)) {
-                    $seen_matches = [];
-                    $seen_match_count = 0;
-                    foreach ($matches as $pre_material_match) {
-                        $match_kinds = explode(":", $pre_material_match);
-                        if (!in_array($match_kinds[1], ["all"]) &&
-                            !isset($match_kinds[2])) {
-                            $found_materialized_metas[] = $pre_material_match;
-                            if ($seen_match_count > 0 &&
-                                !isset($seen_matches[$pre_material_match])) {
-                                $materialized_match_conflict = true;
-                                break 2;
-                            }
-                            $seen_matches[$pre_material_match] = true;
-                            $seen_match_count++;
-                        }
-                    }
-                }
             } elseif ($meta_word == '\-') {
                 if (count($matches[0]) > 0) {
                     foreach ($matches[2] as $disallowed) {
@@ -828,28 +762,18 @@ class PhraseModel extends ParallelModel
         if ($materialized_match_conflict) {
             $found_metas = [];
-            $found_materialized_metas = [];
             $disallow_phrases = [];
             $phrase_string = "";
         $found_metas = array_unique($found_metas);
-        $found_materialized_metas = array_unique($found_materialized_metas);
-        if (empty(trim($phrase_string)) && count($found_metas) == 2
-            && (in_array("site:doc", $found_metas)
-            || in_array("site:any", $found_metas))) {
-            /*site:doc and site:any doesn't work with materialized metas by
-              themselves */
-            array_pop($found_materialized_metas);
-        }
         $disallow_phrases = array_unique($disallow_phrases);
         $phrase_string = mb_ereg_replace("&amp;", "_and_", $phrase_string);
         $query_string = mb_ereg_replace(C\PUNCT, " ", $phrase_string);
         $query_string = preg_replace("/(\s)+/", " ", $query_string);
         $query_string = mb_ereg_replace('_and_', '&', $query_string);
         $phrase_string = mb_ereg_replace('_and_', '&', $phrase_string);
-        return [$found_metas, $found_materialized_metas,
-            $disallow_phrases, $phrase_string, $query_string, $index_name,
-            $weight];
+        return [$found_metas, $disallow_phrases, $phrase_string, $query_string,
+            $index_name, $weight];
      * Ideally, this function tries to guess from the query what the
@@ -1149,7 +1073,7 @@ class PhraseModel extends ParallelModel
             $save_timestamp_name == "") {
             $mem_tmp = serialize($raw).serialize($word_structs).
                 $original_query . $this->index_name;
-            $summary_hash = L\crawlHash($mem_tmp.":".$limit.":".$num);
+            $summary_hash = L\crawlHash($mem_tmp . ":" . $limit . ":" . $num);
             if ($use_cache_if_allowed) {
                 $cache_success = true;
                 $results = self::$cache->get($summary_hash);
@@ -1405,62 +1329,12 @@ class PhraseModel extends ParallelModel
         $results['PAGES'] = $out_pages;
         $results['TIME'] = time();
-        $lang = L\guessLocaleFromString($original_query);
-        $tokenizer = PhraseParser::getTokenizer($lang);
-        //only use tokenizer if no meta word or disjuncts in query
-        if (!preg_match('/(\||\:)/u', $original_query) &&
-            $tokenizer && method_exists($tokenizer, "scoredThesaurusMatches")
-            && method_exists($tokenizer, "tagPartsOfSpeechPhrase")
-            && isset($tokenizer->use_thesaurus)) {
-            $results = $this->sortByThesaurusScore($results, $original_query,
-                $lang);
-            if (!$out_pages) {
-                $results['PAGES'] = $out_pages;
-            }
-        }
         if (!empty($_SERVER["USE_CACHE"]) &&
             $save_timestamp_name == "") {
             self::$cache->set($summary_hash, $results);
         return $results;
-    /**
-     * If user selects Wordnet feature in page options then only
-     * do WordNet processing. Also user has to specify the WordNet directory
-     *
-     * @param array $results document summaries
-     * @param string $original_query the original query that we are computing
-     *      results for
-     * @param string $lang locale tag of query
-     * @return array results document summaries sorted by wordnet score
-     */
-    public function sortByThesaurusScore($results, $original_query, $lang)
-    {
-        $summaries = [];
-        $pages = $results['PAGES'];
-        foreach ($pages as $page) {
-            $summaries[] = $page[self::DESCRIPTION];
-        }
-        $index_name = $this->index_name;
-        $phrases = Thesaurus::getSimilarPhrases($original_query, $index_name,
-            $lang);
-        $results['THESAURUS_VARIANTS'] = $phrases;
-        if (!empty($phrases)) {
-            $thesaurus_scores = Thesaurus::scorePhrasesSummaries($phrases,
-                $summaries);
-            //Store the BM25 score for each page in result array
-            $num_scores = count($thesaurus_scores);
-            for ($i = 0; $i < $num_scores; $i++) {
-                $pages[$i][self::THESAURUS_SCORE] = $thesaurus_scores[$i];
-                L\orderCallback($pages[$i], $pages[$i], self::THESAURUS_SCORE);
-            }
-            if (array_sum($thesaurus_scores) != 0) {
-                usort($pages, C\NS_LIB . "orderCallback");
-            }
-            $results['PAGES'] = $pages;
-        }
-        return $results;
-    }
      * Used to lookup summary info for the pages provided (using their)
      * self::SUMMARY_OFFSET field. If any of the lookup-ed summaries
@@ -1601,7 +1475,7 @@ class PhraseModel extends ParallelModel
     public function getQueryIterator($word_structs, &$filter, $raw,
         &$to_retrieve, $queue_servers = [], $original_query = "",
-        $save_timestamp_name="", $limit_feeds = true)
+        $save_timestamp_name = "", $limit_feeds = true)
         $iterators = [];
         $total_iterators = 0;
@@ -1671,6 +1545,7 @@ class PhraseModel extends ParallelModel
                 $sum = 0;
+                $lookup_cutoff = max(C\MIN_RESULTS_TO_GROUP, $to_retrieve);
                 for ($i = 0; $i < $total_iterators; $i++) {
                     $current_key = (is_string($distinct_word_keys[$i])) ?
                         $distinct_word_keys[$i] : (is_string(
@@ -1686,74 +1561,25 @@ class PhraseModel extends ParallelModel
                         $min_group_override = true;
                     } else {
                         //can happen if exact phrase search suffix approach used
-                        if (isset($distinct_word_keys[$i][0][0]) &&
-                            is_array($distinct_word_keys[$i][0][0])) {
-                            $distinct_keys = [
-                                $distinct_word_keys[$i][0][1]];
-                        } elseif (isset($distinct_word_keys[$i][0]) &&
+                        if (isset($distinct_word_keys[$i][0]) &&
                             is_array($distinct_word_keys[$i][0])) {
                             $distinct_keys = $distinct_word_keys[$i];
                         } else {
                             $distinct_keys = [$distinct_word_keys[$i]];
-                        $out_keys = [];
-                        $old_distinct_key_id = "";
-                        foreach ($distinct_keys as $distinct_key) {
-                            if (is_array($distinct_key)) {
-                                if (!isset($distinct_key[2]) &&
-                                    isset($distinct_key[1])) {
-                                    $distinct_keys[] = $distinct_key[1];
-                                }
-                                $shift = (isset($distinct_key[1])) ?
-                                    $distinct_key[1] : 0;
-                                $mask = (isset($distinct_key[2])) ?
-                                    $distinct_key[2] : "\x00\x00\x00\x00\x00" .
-                                    "\x00\x00\x00\x00\x00\x00";
-                                if (isset($distinct_key[3])) {
-                                    $old_distinct_key_id =
-                                        L\unbase64Hash($distinct_key[3]);
-                                }
-                                $distinct_key_id = L\unbase64Hash(
-                                    $distinct_key[0]);
-                            } else {
-                                $shift = 0;
-                                $mask = "\x00\x00\x00\x00\x00" .
-                                    "\x00\x00\x00\x00\x00\x00";
-                                $distinct_key_id =
-                                    L\unbase64Hash($distinct_key);
-                            }
-                            $lookup_cutoff = max(C\MIN_RESULTS_TO_GROUP,
-                                $to_retrieve);
-                            $info = IndexManager::getWordInfo($index_name,
-                                $distinct_key_id, $shift, $mask, -1, -1,
-                                C\NUM_DISTINCT_GENERATIONS);
-                            if ($old_distinct_key_id != "") {
-                                $old_info = IndexManager::getWordInfo(
-                                    $index_name, $old_distinct_key_id, $shift,
-                                    $mask, -1, -1, C\NUM_DISTINCT_GENERATIONS);
-                                if ($info !== false && $old_info !== false) {
-                                    $info = array_merge($info, $old_info);
-                                } elseif ($old_info !== false) {
-                                    $info = $old_info;
-                                }
-                            }
-                            if ($info != []) {
-                                $tmp_keys = L\arrayColumnCount($info, 4, 3);
-                                $sum += array_sum($tmp_keys);
-                                $out_keys = array_merge($out_keys, $tmp_keys);
-                            }
-                            if ($sum > $lookup_cutoff) {
-                                break;
-                            }
-                        }
-                        $out_keys = array_keys(array_slice($out_keys, 0, 50));
+                        $sum = 0;
                         $tmp_word_iterators =[];
                         $m = 0;
-                        foreach ($out_keys as $distinct_key) {
+                        foreach ($distinct_keys as $distinct_key) {
+                            $shift = (isset($distinct_key[1])) ?
+                                $distinct_key[1] : 0;
+                            $distinct_key_id = L\unbase64Hash(
+                                $distinct_key[0]);
                             $tmp_word_iterators[$m] =
-                                new I\WordIterator($distinct_key,
+                                new I\WordIterator($distinct_key_id, $shift,
                                 $index_name, true, $filter, $to_retrieve,
+                            $sum += $tmp_word_iterators[$m]->num_docs;
                             if ($tmp_word_iterators[$m]->dictionary_info !=
                                 [] ||
                                 $tmp_word_iterators[$m]->feed_count > 0) {
@@ -1762,6 +1588,9 @@ class PhraseModel extends ParallelModel
                             } else {
+                            if ($sum > $lookup_cutoff) {
+                                break;
+                            }
                         if ($m == 1) {
                             $word_iterators[$i] = $tmp_word_iterators[0];
@@ -1780,9 +1609,11 @@ class PhraseModel extends ParallelModel
                 $num_disallow_keys = count($disallow_keys);
                 if ($num_disallow_keys > 0) {
                     for ($i = 0; $i < $num_disallow_keys; $i++) {
+                        /* notice for now shift always 0 - you can't disallow
+                           phrases */
                         $disallow_iterator =
-                            new I\WordIterator($disallow_keys[$i], $index_name,
-                                false, $filter);
+                            new I\WordIterator($disallow_keys[$i], 0,
+                                $index_name, false, $filter);
                         $word_iterators[$num_word_keys + $i] =
                             new I\NegationIterator($disallow_iterator);
diff --git a/src/scripts/suggest.js b/src/scripts/suggest.js
index 6c19dd848..9fa3abcbe 100644
--- a/src/scripts/suggest.js
+++ b/src/scripts/suggest.js
@@ -693,12 +693,9 @@ function spellCheck()
     if (referenceNode) {
         var corrected_spell = elt("spell-check");
-        var thesaurus_results = elt("thesaurus-results");
         /* corrected_spell might not be present if WORD_SUGGEST off
-           If there are already thesaurus results we don't want to
-           clutter the top area so also don't suggest
-        if (!corrected_spell || thesaurus_results) {return; }
+        if (!corrected_spell) {return; }
         var logged_in = elt("csrf-token");
         if (logged_in) {
             var csrf_token = elt("csrf-token").value;
diff --git a/src/views/SearchView.php b/src/views/SearchView.php
index c443c9f5a..b0ea7cd49 100755
--- a/src/views/SearchView.php
+++ b/src/views/SearchView.php
@@ -211,39 +211,15 @@ class SearchView extends View implements CrawlConstants
         } ?>
         <div class="serp-body" >
-        <?php
-        $similar_words = $data['THESAURUS_VARIANTS'];
-        $use_thesaurus = C\WORD_SUGGEST && count($similar_words) > 0 &&
-            !$_SERVER["MOBILE"];
-        if ($use_thesaurus) { ?>
-            <div id="thesaurus-results" class="thesaurus">
-            <?php
-                e(tl('search_view_thesaurus_results'));
-                foreach ($similar_words as $word) {
-                    e("<br />");
-                    ?><span><a href="?<?= $token_string_amp
-                    ?>its=<?= $data['its'] ?>&amp;q=<?=$word ?>"><?=
-                    $word ?></a></span>
-                    <?php
-                }
-            ?>
-            </div>
-        <?php
-        }
-        if ($use_thesaurus) { ?>
-            <div class="thesaurus-serp-results"> <?php
-        } else { ?>
-            <div class="serp-results">
-        <?php
-        }
+        <div class="serp-results"><?php
         if (!$is_landing) {
-        if (!empty($data['BEST_ANSWER'])) { ?>
-            <div id="best-answer" class="echo-link">
+        if (!empty($data['BEST_ANSWER'])) {
+            ?><div id="best-answer" class="echo-link">
                  <?= $data['BEST_ANSWER'] ?>
-            </div>
-        <?php }
+            </div><?php
+        }
         foreach ($data['PAGES'] as $page) {
             if (isset($page[self::URL])) {
                 if (substr($page[self::URL], 0, 4) == "url|") {
@@ -292,8 +268,8 @@ class SearchView extends View implements CrawlConstants
                 e( "</div>");
-            } else if (isset($page['NEWS'])) {
-                $this->helper("feeds")->render($page['NEWS'],
+            } else if (isset($page['FEED'])) {
+                $this->helper("feeds")->render($page['FEED'],
                     $token, $data['QUERY'],  $subsearch,
                 e( "</div>");
@@ -437,12 +413,6 @@ class SearchView extends View implements CrawlConstants
                     number_format($page[self::RELEVANCE], 2) )."\n");
                     number_format($page[self::PROXIMITY], 2) )."\n");
-                if (isset($page[self::THESAURUS_SCORE]) &&
-                    $page[self::THESAURUS_SCORE] > 0) {
-                    e(tl('search_view_thesaurus_score',
-                        number_format($page[self::THESAURUS_SCORE], 2)) .
-                        "\n");
-                }
                 if (isset($page[self::USER_RANKS])) {
                     foreach ($page[self::USER_RANKS] as $label => $score) {
                         e($label.":".number_format($score/6553.6, 2)."\n");
diff --git a/tests/IndexShardTest.php b/tests/IndexShardTest.php
index a1bc2abbc..22d6f758e 100644
--- a/tests/IndexShardTest.php
+++ b/tests/IndexShardTest.php
@@ -334,7 +334,7 @@ class IndexShardTest extends UnitTest
         $meta_ids = ["EEEEEEEE", "FFFFFFFF"];
         //test saving and loading to a file
-            $offset, $word_counts, $meta_ids, [], true);
+            $offset, $word_counts, $meta_ids, true);
         $this->test_objects['shard2'] = IndexShard::load(C\WORK_DIRECTORY.