js search ideas based on elasticlunr.js - never going to be used, only here as a commit for future reference, if I ever need to revisit the idea of executing JS from python

This commit is contained in:
Peter Molnar 2018-07-22 08:48:47 +01:00
parent 487dab201e
commit e16a7e3dea
2 changed files with 2561 additions and 2 deletions

2507
elasticlunr.js Normal file

File diff suppressed because it is too large Load diff

56
nasg.py
View file

@ -14,6 +14,7 @@ import re
import imghdr
import logging
import asyncio
import json
from shutil import copy2 as cp
from math import ceil
from urllib.parse import urlparse
@ -27,10 +28,10 @@ import markdown
from feedgen.feed import FeedGenerator
from bleach import clean
from emoji import UNICODE_EMOJI
from py_mini_racer import py_mini_racer
import exiftool
import settings
import sys
from pprint import pprint
MarkdownImage = namedtuple(
@ -65,7 +66,6 @@ MD = markdown.Markdown(
],
)
class MarkdownDoc(object):
@property
@cached()
@ -443,6 +443,18 @@ class Singular(MarkdownDoc):
else:
return True
@property
def corpus(self):
return {
'url': self.url,
'title': self.title,
'body': "\n".join([
self.name,
self.summary,
self.content,
])
}
async def render(self):
if self.exists:
return
@ -1025,6 +1037,40 @@ class Category(dict):
self.ping_websub()
class Search(object):
def __init__(self):
self.js = py_mini_racer.MiniRacer()
with open('elasticlunr.js') as f:
self.js.eval(f.read())
self.js.eval("""
var index = elasticlunr();
index.addField('title');
index.addField('body');
index.setRef('url');
""")
# index.saveDocument(false);
@property
def fpath(self):
return os.path.join(
settings.paths.get('build'),
'search.json'
)
def add(self, data):
self.js.eval("""
index.addDoc(%s);
""" % (
json.dumps(data)
))
def save(self):
with open(self.fpath, 'wt') as f:
f.write(json.dumps(self.js.eval("index.toJSON()")))
def make():
start = int(round(time.time() * 1000))
content = settings.paths.get('content')
@ -1044,6 +1090,8 @@ def make():
categories = {}
categories['/'] = Category()
sitemap = OrderedDict()
search = Search()
for e in sorted(glob.glob(os.path.join(content, '*', '*', 'index.md'))):
post = Singular(e)
if post.category not in categories:
@ -1056,6 +1104,7 @@ def make():
for i in post.images.values():
worker.append(i.downsize())
worker.append(post.render())
search.add(post.corpus)
sitemap[post.url] = post.mtime
for category in categories.values():
@ -1079,6 +1128,9 @@ def make():
with open(t, 'wt') as f:
f.write("\n".join(sorted(sitemap.keys())))
# dump search index
search.save()
end = int(round(time.time() * 1000))
logging.info('process took %d ms' % (end - start))