js search ideas based on elasticlunr.js - never going to be used, only here as a commit for future reference, if I ever need to revisit the idea of executing JS from python

This commit is contained in:
Peter Molnar 2018-07-22 08:48:47 +01:00
parent 487dab201e
commit e16a7e3dea
2 changed files with 2561 additions and 2 deletions

2507
elasticlunr.js Normal file

File diff suppressed because it is too large Load diff

56
nasg.py
View file

@ -14,6 +14,7 @@ import re
import imghdr import imghdr
import logging import logging
import asyncio import asyncio
import json
from shutil import copy2 as cp from shutil import copy2 as cp
from math import ceil from math import ceil
from urllib.parse import urlparse from urllib.parse import urlparse
@ -27,10 +28,10 @@ import markdown
from feedgen.feed import FeedGenerator from feedgen.feed import FeedGenerator
from bleach import clean from bleach import clean
from emoji import UNICODE_EMOJI from emoji import UNICODE_EMOJI
from py_mini_racer import py_mini_racer
import exiftool import exiftool
import settings import settings
import sys
from pprint import pprint from pprint import pprint
MarkdownImage = namedtuple( MarkdownImage = namedtuple(
@ -65,7 +66,6 @@ MD = markdown.Markdown(
], ],
) )
class MarkdownDoc(object): class MarkdownDoc(object):
@property @property
@cached() @cached()
@ -443,6 +443,18 @@ class Singular(MarkdownDoc):
else: else:
return True return True
@property
def corpus(self):
return {
'url': self.url,
'title': self.title,
'body': "\n".join([
self.name,
self.summary,
self.content,
])
}
async def render(self): async def render(self):
if self.exists: if self.exists:
return return
@ -1025,6 +1037,40 @@ class Category(dict):
self.ping_websub() self.ping_websub()
class Search(object):
def __init__(self):
self.js = py_mini_racer.MiniRacer()
with open('elasticlunr.js') as f:
self.js.eval(f.read())
self.js.eval("""
var index = elasticlunr();
index.addField('title');
index.addField('body');
index.setRef('url');
""")
# index.saveDocument(false);
@property
def fpath(self):
return os.path.join(
settings.paths.get('build'),
'search.json'
)
def add(self, data):
self.js.eval("""
index.addDoc(%s);
""" % (
json.dumps(data)
))
def save(self):
with open(self.fpath, 'wt') as f:
f.write(json.dumps(self.js.eval("index.toJSON()")))
def make(): def make():
start = int(round(time.time() * 1000)) start = int(round(time.time() * 1000))
content = settings.paths.get('content') content = settings.paths.get('content')
@ -1044,6 +1090,8 @@ def make():
categories = {} categories = {}
categories['/'] = Category() categories['/'] = Category()
sitemap = OrderedDict() sitemap = OrderedDict()
search = Search()
for e in sorted(glob.glob(os.path.join(content, '*', '*', 'index.md'))): for e in sorted(glob.glob(os.path.join(content, '*', '*', 'index.md'))):
post = Singular(e) post = Singular(e)
if post.category not in categories: if post.category not in categories:
@ -1056,6 +1104,7 @@ def make():
for i in post.images.values(): for i in post.images.values():
worker.append(i.downsize()) worker.append(i.downsize())
worker.append(post.render()) worker.append(post.render())
search.add(post.corpus)
sitemap[post.url] = post.mtime sitemap[post.url] = post.mtime
for category in categories.values(): for category in categories.values():
@ -1079,6 +1128,9 @@ def make():
with open(t, 'wt') as f: with open(t, 'wt') as f:
f.write("\n".join(sorted(sitemap.keys()))) f.write("\n".join(sorted(sitemap.keys())))
# dump search index
search.save()
end = int(round(time.time() * 1000)) end = int(round(time.time() * 1000))
logging.info('process took %d ms' % (end - start)) logging.info('process took %d ms' % (end - start))