better search: added NGRAM tokenized text to search fragments

2017-06-01 11:19:32 +00:00 · 2017-06-01 11:19:32 +00:00 · 40c334610d
commit 40c334610d
parent cfe0112f70
3 changed files with 10 additions and 5 deletions
--- a/nasg.py
+++ b/nasg.py
@ -44,7 +44,7 @@ def splitpath(path):
    return parts


-class SmartIndexer(object):
+class Indexer(object):

    def __init__(self):
        self.target = os.path.abspath(os.path.join(
@ -94,12 +94,14 @@ class SmartIndexer(object):
        if singular.ispage:
            weight = 100

+        content = " ".join(list(map(str,[*content_real, *content_remote])))
        if exists:
            logging.info("updating search index with %s", singular.fname)
            self.writer.add_document(
                title=singular.title,
                url=singular.url,
-                content=" ".join(list(map(str,[*content_real, *content_remote]))),
+                content=content,
+                fuzzy=content,
                date=singular.published.datetime,
                tags=",".join(list(map(str, singular.tags))),
                weight=weight,
@ -1355,7 +1357,7 @@ class NASG(object):
            shutil.copy2(s, d)

        logging.info("pouplating searchdb")
-        searchdb = SmartIndexer()
+        searchdb = Indexer()
        loop.run_until_complete(self.__aindex(content, searchdb))
        searchdb.finish()

--- a/search.py
+++ b/search.py
@ -28,7 +28,7 @@ def SearchHandler(query, tmpl):
    )))

    qp = qparser.MultifieldParser(
-        ["title", "content", "tags"],
+        ["title", "content","fuzzy", "tags"],
        schema = shared.schema
    )

--- a/shared.py
+++ b/shared.py
@ -57,12 +57,15 @@ schema = fields.Schema(
        stored=True,
        analyzer=analysis.FancyAnalyzer()
    ),
+    fuzzy=fields.NGRAMWORDS(
+        tokenizer=analysis.NgramTokenizer(4)
+    ),
    tags=fields.TEXT(
        stored=True,
        analyzer=analysis.KeywordAnalyzer(
            lowercase=True,
            commas=True
-        )
+        ),
    ),
    weight=fields.NUMERIC(
        sortable=True