better search: added NGRAM tokenized text to search fragments

This commit is contained in:
Peter Molnar 2017-06-01 11:19:32 +00:00
parent cfe0112f70
commit 40c334610d
3 changed files with 10 additions and 5 deletions

View file

@ -44,7 +44,7 @@ def splitpath(path):
return parts return parts
class SmartIndexer(object): class Indexer(object):
def __init__(self): def __init__(self):
self.target = os.path.abspath(os.path.join( self.target = os.path.abspath(os.path.join(
@ -94,12 +94,14 @@ class SmartIndexer(object):
if singular.ispage: if singular.ispage:
weight = 100 weight = 100
content = " ".join(list(map(str,[*content_real, *content_remote])))
if exists: if exists:
logging.info("updating search index with %s", singular.fname) logging.info("updating search index with %s", singular.fname)
self.writer.add_document( self.writer.add_document(
title=singular.title, title=singular.title,
url=singular.url, url=singular.url,
content=" ".join(list(map(str,[*content_real, *content_remote]))), content=content,
fuzzy=content,
date=singular.published.datetime, date=singular.published.datetime,
tags=",".join(list(map(str, singular.tags))), tags=",".join(list(map(str, singular.tags))),
weight=weight, weight=weight,
@ -1355,7 +1357,7 @@ class NASG(object):
shutil.copy2(s, d) shutil.copy2(s, d)
logging.info("pouplating searchdb") logging.info("pouplating searchdb")
searchdb = SmartIndexer() searchdb = Indexer()
loop.run_until_complete(self.__aindex(content, searchdb)) loop.run_until_complete(self.__aindex(content, searchdb))
searchdb.finish() searchdb.finish()

View file

@ -28,7 +28,7 @@ def SearchHandler(query, tmpl):
))) )))
qp = qparser.MultifieldParser( qp = qparser.MultifieldParser(
["title", "content", "tags"], ["title", "content","fuzzy", "tags"],
schema = shared.schema schema = shared.schema
) )

View file

@ -57,12 +57,15 @@ schema = fields.Schema(
stored=True, stored=True,
analyzer=analysis.FancyAnalyzer() analyzer=analysis.FancyAnalyzer()
), ),
fuzzy=fields.NGRAMWORDS(
tokenizer=analysis.NgramTokenizer(4)
),
tags=fields.TEXT( tags=fields.TEXT(
stored=True, stored=True,
analyzer=analysis.KeywordAnalyzer( analyzer=analysis.KeywordAnalyzer(
lowercase=True, lowercase=True,
commas=True commas=True
) ),
), ),
weight=fields.NUMERIC( weight=fields.NUMERIC(
sortable=True sortable=True