better search: added NGRAM tokenized text to search fragments
This commit is contained in:
parent
cfe0112f70
commit
40c334610d
3 changed files with 10 additions and 5 deletions
8
nasg.py
8
nasg.py
|
@ -44,7 +44,7 @@ def splitpath(path):
|
||||||
return parts
|
return parts
|
||||||
|
|
||||||
|
|
||||||
class SmartIndexer(object):
|
class Indexer(object):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.target = os.path.abspath(os.path.join(
|
self.target = os.path.abspath(os.path.join(
|
||||||
|
@ -94,12 +94,14 @@ class SmartIndexer(object):
|
||||||
if singular.ispage:
|
if singular.ispage:
|
||||||
weight = 100
|
weight = 100
|
||||||
|
|
||||||
|
content = " ".join(list(map(str,[*content_real, *content_remote])))
|
||||||
if exists:
|
if exists:
|
||||||
logging.info("updating search index with %s", singular.fname)
|
logging.info("updating search index with %s", singular.fname)
|
||||||
self.writer.add_document(
|
self.writer.add_document(
|
||||||
title=singular.title,
|
title=singular.title,
|
||||||
url=singular.url,
|
url=singular.url,
|
||||||
content=" ".join(list(map(str,[*content_real, *content_remote]))),
|
content=content,
|
||||||
|
fuzzy=content,
|
||||||
date=singular.published.datetime,
|
date=singular.published.datetime,
|
||||||
tags=",".join(list(map(str, singular.tags))),
|
tags=",".join(list(map(str, singular.tags))),
|
||||||
weight=weight,
|
weight=weight,
|
||||||
|
@ -1355,7 +1357,7 @@ class NASG(object):
|
||||||
shutil.copy2(s, d)
|
shutil.copy2(s, d)
|
||||||
|
|
||||||
logging.info("pouplating searchdb")
|
logging.info("pouplating searchdb")
|
||||||
searchdb = SmartIndexer()
|
searchdb = Indexer()
|
||||||
loop.run_until_complete(self.__aindex(content, searchdb))
|
loop.run_until_complete(self.__aindex(content, searchdb))
|
||||||
searchdb.finish()
|
searchdb.finish()
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ def SearchHandler(query, tmpl):
|
||||||
)))
|
)))
|
||||||
|
|
||||||
qp = qparser.MultifieldParser(
|
qp = qparser.MultifieldParser(
|
||||||
["title", "content", "tags"],
|
["title", "content","fuzzy", "tags"],
|
||||||
schema = shared.schema
|
schema = shared.schema
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -57,12 +57,15 @@ schema = fields.Schema(
|
||||||
stored=True,
|
stored=True,
|
||||||
analyzer=analysis.FancyAnalyzer()
|
analyzer=analysis.FancyAnalyzer()
|
||||||
),
|
),
|
||||||
|
fuzzy=fields.NGRAMWORDS(
|
||||||
|
tokenizer=analysis.NgramTokenizer(4)
|
||||||
|
),
|
||||||
tags=fields.TEXT(
|
tags=fields.TEXT(
|
||||||
stored=True,
|
stored=True,
|
||||||
analyzer=analysis.KeywordAnalyzer(
|
analyzer=analysis.KeywordAnalyzer(
|
||||||
lowercase=True,
|
lowercase=True,
|
||||||
commas=True
|
commas=True
|
||||||
)
|
),
|
||||||
),
|
),
|
||||||
weight=fields.NUMERIC(
|
weight=fields.NUMERIC(
|
||||||
sortable=True
|
sortable=True
|
||||||
|
|
Loading…
Reference in a new issue