better search: added NGRAM tokenized text to search fragments
This commit is contained in:
parent
cfe0112f70
commit
40c334610d
3 changed files with 10 additions and 5 deletions
8
nasg.py
8
nasg.py
|
@ -44,7 +44,7 @@ def splitpath(path):
|
|||
return parts
|
||||
|
||||
|
||||
class SmartIndexer(object):
|
||||
class Indexer(object):
|
||||
|
||||
def __init__(self):
|
||||
self.target = os.path.abspath(os.path.join(
|
||||
|
@ -94,12 +94,14 @@ class SmartIndexer(object):
|
|||
if singular.ispage:
|
||||
weight = 100
|
||||
|
||||
content = " ".join(list(map(str,[*content_real, *content_remote])))
|
||||
if exists:
|
||||
logging.info("updating search index with %s", singular.fname)
|
||||
self.writer.add_document(
|
||||
title=singular.title,
|
||||
url=singular.url,
|
||||
content=" ".join(list(map(str,[*content_real, *content_remote]))),
|
||||
content=content,
|
||||
fuzzy=content,
|
||||
date=singular.published.datetime,
|
||||
tags=",".join(list(map(str, singular.tags))),
|
||||
weight=weight,
|
||||
|
@ -1355,7 +1357,7 @@ class NASG(object):
|
|||
shutil.copy2(s, d)
|
||||
|
||||
logging.info("pouplating searchdb")
|
||||
searchdb = SmartIndexer()
|
||||
searchdb = Indexer()
|
||||
loop.run_until_complete(self.__aindex(content, searchdb))
|
||||
searchdb.finish()
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ def SearchHandler(query, tmpl):
|
|||
)))
|
||||
|
||||
qp = qparser.MultifieldParser(
|
||||
["title", "content", "tags"],
|
||||
["title", "content","fuzzy", "tags"],
|
||||
schema = shared.schema
|
||||
)
|
||||
|
||||
|
|
|
@ -57,12 +57,15 @@ schema = fields.Schema(
|
|||
stored=True,
|
||||
analyzer=analysis.FancyAnalyzer()
|
||||
),
|
||||
fuzzy=fields.NGRAMWORDS(
|
||||
tokenizer=analysis.NgramTokenizer(4)
|
||||
),
|
||||
tags=fields.TEXT(
|
||||
stored=True,
|
||||
analyzer=analysis.KeywordAnalyzer(
|
||||
lowercase=True,
|
||||
commas=True
|
||||
)
|
||||
),
|
||||
),
|
||||
weight=fields.NUMERIC(
|
||||
sortable=True
|
||||
|
|
Loading…
Reference in a new issue