petermolnar's repositories — nasg: 18521317d3c8d857c6c0cde0c8cb04269c1caabc

fixed things

Peter Molnar hello@petermolnar.eu

Fri, 02 Jun 2017 10:19:55 +0000

commit

18521317d3c8d857c6c0cde0c8cb04269c1caabc

parent

40c334610d49d69e81cf589fea6de8517c80f163

5 files changed, 286 insertions(+), 45 deletions(-)

jump to

nasg.py

search.py

shared.py

update.sh

webmention.py

M nasg.py → nasg.py

@@ -16,6 +16,7 @@ import math
 import asyncio
 import csv
 import operator
+import getpass
 
 import magic
 import arrow
@@ -32,8 +33,10 @@ import jinja2
 import urllib.parse
 import shared
 from webmentiontools.send import WebmentionSend
+from bleach import clean
+from emoji import UNICODE_EMOJI
 
-import time
+from pprint import pprint
 
 def splitpath(path):
     parts = []
@@ -51,20 +54,34 @@ self.target = os.path.abspath(os.path.join(
             shared.config.get('target', 'builddir'),
             shared.config.get('var', 'searchdb')
         ))
+
         if not os.path.isdir(self.target):
             os.mkdir(self.target)
 
+        self.mtime = 0
+
         if index.exists_in(self.target):
             self.ix = index.open_dir(self.target)
+            tocfiles = glob.glob(os.path.join(
+                self.target,
+                "_MAIN_*.toc"
+            ))
+            if len(tocfiles):
+                self.mtime = int(os.path.getmtime(tocfiles[0]))
         else:
             self.ix = index.create_in(self.target, shared.schema)
+
         self.writer = self.ix.writer()
         self.qp = qparser.QueryParser("url", schema=shared.schema)
+
 
     async def append(self, singular):
         logging.debug("searching for existing index for %s", singular.fname)
-        exists = False
+        if self.mtime >= singular.mtime:
+            logging.debug("search index is newer than post mtime (%d vs %d), skipping post", self.mtime, singular.mtime)
+            return
 
+        exists = False
         q = self.qp.parse(singular.url)
         r = self.ix.searcher().search(q, limit=1)
         if r:
@@ -114,6 +131,7 @@ self.writer.update_document(
                 title=singular.title,
                 url=singular.url,
                 content=" ".join(list(map(str,[*content_real, *content_remote]))),
+                fuzzy=content,
                 date=singular.published.datetime,
                 tags=",".join(list(map(str, singular.tags))),
                 weight=weight,
@@ -274,7 +292,6 @@
     def get_metadata(self, *filenames):
         return json.loads(self.execute(
             '-sort',
-            #'-quiet',
             '-json',
             '-MIMEType',
             '-FileType',
@@ -299,6 +316,102 @@ '-GPSLatitude#',
             '-LensID',
             *filenames))
 
+
+class Comment(object):
+    def __init__(self, path):
+        logging.debug("initiating comment object from %s", path)
+        self.path = path
+        self.fname, self.ext = os.path.splitext(os.path.basename(self.path))
+        self.mtime = int(os.path.getmtime(self.path))
+        self.meta = {}
+        self.content = ''
+        self.__parse()
+
+    def __repr__(self):
+        return "%s" % (self.path)
+
+    def __parse(self):
+        with open(self.path, mode='rt') as f:
+            self.meta, self.content = frontmatter.parse(f.read())
+
+    @property
+    def reacji(self):
+        t = self.meta.get('type', 'webmention')
+        typemap = {
+            'like-of': '👍',
+            'bookmark-of': '🔖',
+            'favorite': '★',
+        }
+
+        if t in typemap.keys():
+            return typemap[t]
+
+        maybe = clean(self.content).strip()
+        if maybe in UNICODE_EMOJI:
+            return maybe
+
+        return ''
+
+    @property
+    def html(self):
+        return shared.Pandoc().convert(self.content)
+
+    @property
+    def tmplvars(self):
+        return {
+            'published': self.published.datetime,
+            'author': dict(self.meta.get('author', {})),
+            'content': self.content,
+            'html': self.html,
+            'source': self.meta.get('source', ''),
+            'target': self.meta.get('target', ''),
+            'type': self.meta.get('type', 'webmention'),
+            'reacji': self.reacji
+        }
+
+    @property
+    def published(self):
+        return arrow.get(self.meta.get('date', self.mtime))
+
+    @property
+    def pubtime(self):
+        return int(self.published.timestamp)
+
+    @property
+    def source(self):
+        s = self.meta.get('source')
+        for d in shared.config.get('site', 'domains').split(' '):
+            if d in s:
+                return ''
+        return s
+
+    @property
+    def target(self):
+        t = self.meta.get('target', shared.config.get('site', 'url'))
+        t = '{p.path}'.format(p=urllib.parse.urlparse(t)).strip('/')
+        return t
+
+
+class Comments(object):
+    def __init__(self):
+        self.files = glob.glob(os.path.join(
+            shared.config.get('source', 'commentsdir'),
+            "*.md"
+        ))
+        self.bytarget = {}
+
+    def __getitem__(self, key):
+        return self.bytarget.get(key, BaseIter())
+
+    def populate(self):
+        for fpath in self.files:
+            item = Comment(fpath)
+            t = item.target
+            if not self.bytarget.get(t):
+                self.bytarget[t] = BaseIter()
+            self.bytarget[t].append(item.pubtime, item)
+
+
 class Images(BaseIter):
     def __init__(self, extensions=['jpg', 'gif', 'png']):
         super(Images, self).__init__()
@@ -646,7 +759,10 @@ return '/'
 
     @property
     def mtime(self):
-        return int(list(sorted(self.data.keys(), reverse=True))[0])
+        if hasattr(self, '_mtime'):
+            return self._mtime
+        self._mtime = int(list(sorted(self.data.keys(), reverse=True))[0])
+        return self._mtime
 
     def __mkdirs(self):
         check = [self.basep, self.myp, self.feedp]
@@ -671,16 +787,23 @@ return "%s/index.html" % (self.myp)
         else:
             return "%s/%d/index.html" % (self.pagep, page)
 
-
     async def render(self, renderer):
         self.__mkdirs()
         page = 1
         testpath = self.tpath(page)
         if not shared.config.getboolean('params', 'force') and os.path.isfile(testpath):
             ttime = int(os.path.getmtime(testpath))
-            if ttime == self.mtime:
+            mtime = self.mtime
+            if ttime == mtime:
                 logging.info('taxonomy index for "%s" exists and up-to-date (lastmod: %d)', self.slug, ttime)
                 return
+            else:
+                logging.info('taxonomy update needed: %s timestamp is %d, last post timestamp is %d (%s)',
+                    testpath,
+                    ttime,
+                    mtime,
+                    self.data[mtime].fname
+                )
 
         while page <= self.pages:
             self.renderpage(renderer, page)
@@ -712,7 +835,7 @@
         r = renderer.j2.get_template('archive.html').render(tmplvars)
         with open(target, "wt") as html:
             html.write(r)
-            os.utime(target, (self.mtime, self.mtime))
+        os.utime(target, (self.mtime, self.mtime))
 
         if 1 == page:
             target = os.path.join(self.feedp, 'index.xml')
@@ -723,9 +846,10 @@ html.write(r)
             os.utime(target, (self.mtime, self.mtime))
 
 class Content(BaseIter):
-    def __init__(self, images, extensions=['md']):
+    def __init__(self, images, comments, extensions=['md']):
         super(Content, self).__init__()
         self.images = images
+        self.comments = comments
         basepath = shared.config.get('source', 'contentdir')
         self.files = []
         for ext in extensions:
@@ -733,12 +857,14 @@ self.files += glob.glob(os.path.join(basepath, "*", "*.%s" % ext))
         self.tags = {}
         self.categories = {}
         self.front = Taxonomy()
+        self.shortslugmap = {}
 
     def populate(self):
         now = arrow.utcnow().timestamp
         for fpath in self.files:
-            item = Singular(fpath, self.images)
+            item = Singular(fpath, self.images, self.comments)
             self.append(item.pubtime, item)
+            #self.shortslugmap[item.shortslug] = item.fname
 
             if item.pubtime > now:
                 logging.warning("skipping future post %s", item.fname)
@@ -829,10 +955,11 @@ html.write(r)
             html.close()
 
 class Singular(object):
-    def __init__(self, path, images):
+    def __init__(self, path, images, comments):
         logging.debug("initiating singular object from %s", path)
         self.path = path
         self.images = images
+        self.allcomments = comments
         self.category = splitpath(path)[-2]
         self.mtime = int(os.path.getmtime(self.path))
         self.fname, self.ext = os.path.splitext(os.path.basename(self.path))
@@ -903,6 +1030,47 @@ "%s" % image
             )
 
     @property
+    def comments(self):
+        if hasattr(self, '_comments'):
+            return self._comments
+
+        # the comments could point to both the "real" url and the shortslug
+        # so I need to get both
+        c = {}
+        for by in [self.fname, self.shortslug]:
+            c = {**c, **self.allcomments[by].data}
+        #self._comments = [c[k].tmplvars for k in list(sorted(c.keys(), reverse=True))]
+        self._comments = [c[k] for k in list(sorted(c.keys(), reverse=True))]
+        return self._comments
+
+    @property
+    def replies(self):
+        if hasattr(self, '_replies'):
+            return self._replies
+        self._replies = [c.tmplvars for c in self.comments if not len(c.reacji)]
+        return self._replies
+
+    @property
+    def reacjis(self):
+        if hasattr(self, '_reacjis'):
+            return self._reacjis
+        reacjis = {}
+        for c in self.comments:
+            rj = c.reacji
+
+            if not len(rj):
+                continue
+
+            if not reacjis.get(rj, False):
+                reacjis[rj] = []
+
+            reacjis[rj].append(c.tmplvars)
+
+        self._reacjis = reacjis
+        return self._reacjis
+
+
+    @property
     def reactions(self):
         # getting rid of '-' to avoid css trouble and similar
         convert = {
@@ -958,17 +1126,23 @@ return list(self.meta.get('tags', []))
 
     @property
     def published(self):
-        return arrow.get(
+        if hasattr(self, '_published'):
+            return self._published
+        self._published = arrow.get(
             self.meta.get('published', self.mtime)
         )
+        return self._published
 
     @property
     def updated(self):
-        return arrow.get(
+        if hasattr(self, '_updated'):
+            return self._updated
+        self._updated = arrow.get(
             self.meta.get('updated',
                 self.meta.get('published', self.mtime)
             )
         )
+        return self._updated
 
     @property
     def pubtime(self):
@@ -1027,7 +1201,19 @@ return 'singular.html'
 
     @property
     def html(self):
-        return shared.Pandoc().convert(self.content)
+        if hasattr(self, '_html'):
+            return self._html
+        self._html = shared.Pandoc().convert(self.content)
+        return self._html
+
+    @property
+    def sumhtml(self):
+        if hasattr(self, '_sumhtml'):
+            return self._sumhtml
+        self._sumhtml = self.meta.get('summary', '')
+        if len(self._sumhtml):
+            self._sumhtml = shared.Pandoc().convert(self.summary)
+        return self._sumhtml
 
     @property
     def offlinecopies(self):
@@ -1110,7 +1296,10 @@ return self.photo.rssenclosure
 
     @property
     def tmplvars(self):
-        return {
+        if hasattr(self, '_tmplvars'):
+            return self._tmplvars
+
+        self._tmplvars = {
             'title': self.title,
             'published': self.published.datetime,
             'tags': self.tags,
@@ -1120,7 +1309,7 @@ 'html': self.html,
             'category': self.category,
             'reactions': self.reactions,
             'updated': self.updated.datetime,
-            'summary': self.meta.get('summary', ''),
+            'summary': self.sumhtml,
             'exif': self.exif,
             'lang': self.lang,
             'syndicate': '',
@@ -1128,11 +1317,18 @@ 'slug': self.fname,
             'shortslug': self.shortslug,
             'rssenclosure': self.rssenclosure,
             'copies': self.offlinecopies,
+            'comments': self.comments,
+            'replies': self.replies,
+            'reacjis': self.reacjis,
         }
+        return self._tmplvars
 
     @property
     def shortslug(self):
-        return self.baseN(self.pubtime)
+        if hasattr(self, '_shortslug'):
+            return self._shortslug
+        self._shortslug = self.baseN(self.pubtime)
+        return self._shortslug
 
     @staticmethod
     def baseN(num, b=36, numerals="0123456789abcdefghijklmnopqrstuvwxyz"):
@@ -1147,6 +1343,11 @@ ).lstrip(numerals[0]) + numerals[num % b]
         )
 
     async def render(self, renderer):
+        if len(self.comments):
+            lctime = self.comments[0].mtime
+            if lctime > self.mtime:
+                self.mtime = lctime
+
         logging.info("rendering and saving %s", self.fname)
         targetdir = os.path.abspath(os.path.join(
             shared.config.get('target', 'builddir'),
@@ -1174,8 +1375,29 @@ with open(target, "w") as html:
             logging.debug('writing %s', target)
             html.write(r)
             html.close()
-            os.utime(target, (self.mtime, self.mtime))
+        os.utime(target, (self.mtime, self.mtime))
+
+
+    async def ping(self, pinger):
+        for target in self.urls:
+            record = {
+                'mtime': self.mtime,
+                'source': self.url,
+                'target': target
+            }
+            h = json.dumps(record, sort_keys=True)
+            h = hashlib.sha1(h.encode('utf-8')).hexdigest()
+            if pinger.db.get(h, False):
+                logging.debug(
+                    "%s is already pinged from %s @ %d, skipping",
+                    target, self.url, self.mtime
+                )
+                continue
 
+            logging.info("sending webmention from %s to %s", self.url, target)
+            ws = WebmentionSend(self.url, target)
+            ws.send(allow_redirects=True, timeout=30)
+            pinger.db[h] = record
 
 class Webmentioner(object):
     def __init__(self):
@@ -1190,32 +1412,14 @@ self.db = json.loads(f.read())
         else:
             self.db = {}
 
-    async def ping(self, singular, dry_run = False):
-        for target in singular.urls:
-            record = {
-                'mtime': singular.mtime,
-                'source': singular.url,
-                'target': target
-            }
-            h = json.dumps(record, sort_keys=True)
-            h = hashlib.sha1(h.encode('utf-8')).hexdigest()
-            if self.db.get(h, False):
-                logging.debug("%s is already pinged from %s @ %d, skipping",
-                    target, singular.url, singular.mtime)
-                continue
-
-            logging.info("sending webmention from %s to %s", singular, target)
-            if not dry_run:
-                ws = WebmentionSend(source, target)
-                await ws.send(allowredirect=True, timeout=30)
-            self.db[h] = record
-
     def finish(self):
         with open(self.dbpath, 'wt') as f:
             f.write(json.dumps(self.db, sort_keys=True, indent=4))
 
 
 class NASG(object):
+    lockfile = os.path.join(tempfile.gettempdir(), 'nasg_%s.lock' % getpass.getuser())
+
     def __init__(self):
         # --- set params
         parser = argparse.ArgumentParser(description='Parameters for NASG')
@@ -1300,9 +1504,19 @@ await searchdb.append(singular)
 
     async def __aping(self, content, pinger):
         for (pubtime, singular) in content:
-            await pinger.ping(singular)
+            await singular.ping(pinger)
 
     def run(self):
+        if os.path.isfile(self.lockfile):
+            raise ValueError(
+                "Lockfile is present at %s; another instance is running." % (
+                    self.lockfile
+                )
+            )
+        else:
+            atexit.register(os.remove, self.lockfile)
+            with open(self.lockfile, "wt") as f:
+                f.write(arrow.utcnow().format())
 
         if shared.config.getboolean('params', 'clear'):
             input('about to clear build directory, press enter to continue')
@@ -1327,20 +1541,30 @@ if not shared.config.getboolean('params', 'nodownsize'):
             logging.info("downsizing images")
             loop.run_until_complete(self.__adownsize(images, existing))
 
+        logging.info("discovering comments")
+        comments = Comments()
+        comments.populate()
+
         logging.info("discovering content")
-        content = Content(images)
+        content = Content(images, comments)
         content.populate()
 
         renderer = Renderer()
         if not shared.config.getboolean('params', 'norender'):
             logging.info("rendering content")
-            loop.run_until_complete(self.__acrender(content, renderer))
+            loop.run_until_complete(self.__acrender(
+                content, renderer
+            ))
 
             logging.info("rendering categories and tags")
-            loop.run_until_complete(self.__atrender([content.categories, content.tags], renderer))
+            loop.run_until_complete(self.__atrender(
+                [content.categories, content.tags], renderer
+            ))
 
             logging.info("rendering the front page elements")
-            loop.run_until_complete(self.__afrender(content.front, renderer))
+            loop.run_until_complete(self.__afrender(
+                content.front, renderer
+            ))
 
             logging.info("rendering sitemap")
             content.sitemap()

M search.py → search.py

@@ -28,7 +28,7 @@ shared.config.get('var', 'searchdb')
     )))
 
     qp = qparser.MultifieldParser(
-        ["title", "content","fuzzy", "tags"],
+        ["title", "content", "tags"],
         schema = shared.schema
     )

M shared.py → shared.py

@@ -104,7 +104,10 @@ def __enter__(self):
         self.process = subprocess.Popen(
             [self.executable, "-stay_open", "True",  "-@", "-"],
             universal_newlines=True,
-            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE
+        )
         return self
 
     def  __exit__(self, exc_type, exc_value, traceback):

A update.sh

@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+lastmodfile="$(find /web/petermolnar.net/petermolnar.net/ -maxdepth 2 -type f -print0 | xargs -0r ls -ltr | grep -E '(content|comments|copy|files|offlinecopies|photos)' | tail -1 | awk '{print $9}')"
+lastmod=$(stat -c %Y "$lastmodfile")
+lastrunfile="/web/petermolnar.net/petermolnar.net/build/magic.php"
+lastrun=$(stat -c %Y "$lastrunfile")
+
+
+if [ "$lastrun" -lt "$lastmod" ]; then
+    cd /web/petermolnar.net/petermolnar.net/nasg; python3.5 nasg.py --loglevel info
+fi
+
+exit 0

M webmention.py → webmention.py

@@ -111,7 +111,8 @@
     def _save(self):
         target = os.path.join(
             shared.config.get('source', 'commentsdir'),
-            self.mhash
+            self.mhash,
+            '.md'
         )
 
         if os.path.isfile(target):