all repos — nasg @ 40c334610d49d69e81cf589fea6de8517c80f163

better search: added NGRAM tokenized text to search fragments
Peter Molnar hello@petermolnar.eu
Thu, 01 Jun 2017 11:19:32 +0000
commit

40c334610d49d69e81cf589fea6de8517c80f163

parent

cfe0112f70dd815a76e9121fbd99feff7ad92e12

3 files changed, 10 insertions(+), 5 deletions(-)

jump to
M nasg.pynasg.py

@@ -44,7 +44,7 @@ (path,tail) = os.path.split(path)

return parts -class SmartIndexer(object): +class Indexer(object): def __init__(self): self.target = os.path.abspath(os.path.join(

@@ -94,12 +94,14 @@ weight = 10

if singular.ispage: weight = 100 + content = " ".join(list(map(str,[*content_real, *content_remote]))) if exists: logging.info("updating search index with %s", singular.fname) self.writer.add_document( title=singular.title, url=singular.url, - content=" ".join(list(map(str,[*content_real, *content_remote]))), + content=content, + fuzzy=content, date=singular.published.datetime, tags=",".join(list(map(str, singular.tags))), weight=weight,

@@ -1355,7 +1357,7 @@ logging.debug("copying %s to %s", s, d)

shutil.copy2(s, d) logging.info("pouplating searchdb") - searchdb = SmartIndexer() + searchdb = Indexer() loop.run_until_complete(self.__aindex(content, searchdb)) searchdb.finish()
M search.pysearch.py

@@ -28,7 +28,7 @@ shared.config.get('var', 'searchdb')

))) qp = qparser.MultifieldParser( - ["title", "content", "tags"], + ["title", "content","fuzzy", "tags"], schema = shared.schema )
M shared.pyshared.py

@@ -57,12 +57,15 @@ content=fields.TEXT(

stored=True, analyzer=analysis.FancyAnalyzer() ), + fuzzy=fields.NGRAMWORDS( + tokenizer=analysis.NgramTokenizer(4) + ), tags=fields.TEXT( stored=True, analyzer=analysis.KeywordAnalyzer( lowercase=True, commas=True - ) + ), ), weight=fields.NUMERIC( sortable=True