From 40c334610d49d69e81cf589fea6de8517c80f163 Mon Sep 17 00:00:00 2001
From: Peter Molnar <hello@petermolnar.eu>
Date: Thu, 1 Jun 2017 11:19:32 +0000
Subject: [PATCH] better search: added NGRAM tokenized text to search fragments

---
 nasg.py   | 8 +++++---
 search.py | 2 +-
 shared.py | 5 ++++-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/nasg.py b/nasg.py
index 367d2db..717c43d 100755
--- a/nasg.py
+++ b/nasg.py
@@ -44,7 +44,7 @@ def splitpath(path):
     return parts
 
 
-class SmartIndexer(object):
+class Indexer(object):
 
     def __init__(self):
         self.target = os.path.abspath(os.path.join(
@@ -94,12 +94,14 @@ class SmartIndexer(object):
         if singular.ispage:
             weight = 100
 
+        content = " ".join(list(map(str,[*content_real, *content_remote])))
         if exists:
             logging.info("updating search index with %s", singular.fname)
             self.writer.add_document(
                 title=singular.title,
                 url=singular.url,
-                content=" ".join(list(map(str,[*content_real, *content_remote]))),
+                content=content,
+                fuzzy=content,
                 date=singular.published.datetime,
                 tags=",".join(list(map(str, singular.tags))),
                 weight=weight,
@@ -1355,7 +1357,7 @@ class NASG(object):
             shutil.copy2(s, d)
 
         logging.info("pouplating searchdb")
-        searchdb = SmartIndexer()
+        searchdb = Indexer()
         loop.run_until_complete(self.__aindex(content, searchdb))
         searchdb.finish()
 
diff --git a/search.py b/search.py
index e1cc10d..20c5162 100644
--- a/search.py
+++ b/search.py
@@ -28,7 +28,7 @@ def SearchHandler(query, tmpl):
     )))
 
     qp = qparser.MultifieldParser(
-        ["title", "content", "tags"],
+        ["title", "content","fuzzy", "tags"],
         schema = shared.schema
     )
 
diff --git a/shared.py b/shared.py
index 0acf2ab..f5b51e4 100644
--- a/shared.py
+++ b/shared.py
@@ -57,12 +57,15 @@ schema = fields.Schema(
         stored=True,
         analyzer=analysis.FancyAnalyzer()
     ),
+    fuzzy=fields.NGRAMWORDS(
+        tokenizer=analysis.NgramTokenizer(4)
+    ),
     tags=fields.TEXT(
         stored=True,
         analyzer=analysis.KeywordAnalyzer(
             lowercase=True,
             commas=True
-        )
+        ),
     ),
     weight=fields.NUMERIC(
         sortable=True