better search: added NGRAM tokenized text to search fragments

This commit is contained in:
Peter Molnar 2017-06-01 11:19:32 +00:00
parent cfe0112f70
commit 40c334610d
3 changed files with 10 additions and 5 deletions

View file

@ -44,7 +44,7 @@ def splitpath(path):
return parts
class SmartIndexer(object):
class Indexer(object):
def __init__(self):
self.target = os.path.abspath(os.path.join(
@ -94,12 +94,14 @@ class SmartIndexer(object):
if singular.ispage:
weight = 100
content = " ".join(list(map(str,[*content_real, *content_remote])))
if exists:
logging.info("updating search index with %s", singular.fname)
self.writer.add_document(
title=singular.title,
url=singular.url,
content=" ".join(list(map(str,[*content_real, *content_remote]))),
content=content,
fuzzy=content,
date=singular.published.datetime,
tags=",".join(list(map(str, singular.tags))),
weight=weight,
@ -1355,7 +1357,7 @@ class NASG(object):
shutil.copy2(s, d)
logging.info("pouplating searchdb")
searchdb = SmartIndexer()
searchdb = Indexer()
loop.run_until_complete(self.__aindex(content, searchdb))
searchdb.finish()

View file

@ -28,7 +28,7 @@ def SearchHandler(query, tmpl):
)))
qp = qparser.MultifieldParser(
["title", "content", "tags"],
["title", "content","fuzzy", "tags"],
schema = shared.schema
)

View file

@ -57,12 +57,15 @@ schema = fields.Schema(
stored=True,
analyzer=analysis.FancyAnalyzer()
),
fuzzy=fields.NGRAMWORDS(
tokenizer=analysis.NgramTokenizer(4)
),
tags=fields.TEXT(
stored=True,
analyzer=analysis.KeywordAnalyzer(
lowercase=True,
commas=True
)
),
),
weight=fields.NUMERIC(
sortable=True