all repos — nasg @ e5518ba4a1262d8576e0d59c2e921d0af5991caf

adding XRay as parser for webmentions; processing incoming webmentions; moved notify via telegram to shared
Peter Molnar hello@petermolnar.eu
Sun, 29 Oct 2017 19:11:01 +0000
commit

e5518ba4a1262d8576e0d59c2e921d0af5991caf

parent

2711276e088438ab6e39693c659ac2e5af3799f8

4 files changed, 180 insertions(+), 63 deletions(-)

jump to
M db.pydb.py

@@ -199,20 +199,16 @@ "%s" % shared.config.get('var', 'webmentiondb')

) cursor = self.db.cursor() - cursor.execute('''CREATE TABLE IF NOT EXISTS `archive` ( - `id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE, - `received` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, - `processed` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, - `source` TEXT NOT NULL, - `target` TEXT NOT NULL - );'''); - - cursor.execute('''CREATE TABLE IF NOT EXISTS `queue` ( - `id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE, - `timestamp` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, - `source` TEXT NOT NULL, - `target` TEXT NOT NULL - );'''); + cursor.execute(''' + CREATE TABLE IF NOT EXISTS `queue` ( + `id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE, + `timestamp` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + `source` TEXT NOT NULL, + `target` TEXT NOT NULL, + `status` INTEGER NOT NULL DEFAULT 0, + `mtime` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ); + ''') self.db.commit() def __exit__(self):

@@ -230,3 +226,24 @@ target

) ) self.db.commit() + + def get_queued(self, fname=None): + logging.debug('getting queued webmentions for %s', fname) + ret = [] + cursor = self.db.cursor() + cursor.execute('''SELECT * FROM queue WHERE target LIKE ? AND status = 0''', ('%'+fname+'%',)) + rows = cursor.fetchall() + for r in rows: + ret.append({ + 'id': r[0], + 'dt': r[1], + 'source': r[2], + 'target': r[3], + }) + return ret + + def entry_done(self, id): + logging.debug('setting %s webmention to done', id) + cursor = self.db.cursor() + cursor.execute("UPDATE queue SET status = 1 where ID=?", (id,)) + self.db.commit()
M nasg.pynasg.py

@@ -9,7 +9,6 @@ import glob

import argparse import shutil from urllib.parse import urlparse -#from urllib.parse import urljoin import asyncio from math import ceil import csv

@@ -20,9 +19,6 @@ import frontmatter

import arrow import langdetect import wand.image - -#import requests -#from bs4 import BeautifulSoup from emoji import UNICODE_EMOJI import shared

@@ -31,7 +27,7 @@

from pprint import pprint class MagicPHP(object): - name = 'magic.php' + name = 'index.php' def __init__(self): # init 'gone 410' array

@@ -330,6 +326,26 @@ self.photo.inline = False

self.photo.cssclass = 'u-photo' + def init_extras(self): + self.process_webmentions() + c = self.comments + + + # TODO this should be async + def process_webmentions(self): + wdb = db.WebmentionQueue() + queued = wdb.get_queued(self.url) + for incoming in queued: + wm = Webmention( + incoming.get('id'), + incoming.get('source'), + incoming.get('target'), + incoming.get('dt') + ) + wm.run() + + wdb.entry_done(incoming.get('id')) + wdb.finish() @property def redirects(self):

@@ -380,6 +396,10 @@ )

) cfiles = [*cfiles, *maybe] for cpath in cfiles: + cmtime = os.path.getmtime(cpath) + if cmtime > self.mtime: + self.mtime = cmtime + c = Comment(cpath) comments.append(c.mtime, c) return comments

@@ -853,7 +873,6 @@ return img

def _copy(self): fname = "%s%s" % (self.fname, self.fext) - logging.info("copying %s to build dir", fname) fpath = os.path.join( shared.config.get('common', 'build'), shared.config.get('common', 'files'),

@@ -863,6 +882,7 @@ if os.path.isfile(fpath):

mtime = os.path.getmtime(fpath) if self.mtime <= mtime: return + logging.info("copying %s to build dir", fname) shutil.copy(self.fpath, fpath) def _intermediate_dimension(self, size, width, height, crop=False):

@@ -878,7 +898,7 @@ w = int(float(size / height) * width)

return (w, h) def _intermediate(self, img, size, target, crop=False): - if img.width <= size and img.height <= size: + if img.width < size and img.height < size: return False with img.clone() as thumb:

@@ -1044,40 +1064,81 @@ tmplfile = "%s.html" % (__class__.__name__)

return shared.j2.get_template(tmplfile).render({'comment': self.tmplvars}) -#class SendWebmention(object): - ## TODO def __init__(self, source, target): - ## check in gone.tsv? - ## discover endpoint - ## send webmention - ## add to DB on return +class Webmention(object): + def __init__ (self, id, source, target, dt): + self.source = source + self.target = target + self.id = id + self.dt = arrow.get(dt).to('utc') + logging.info( + "processing webmention %s => %s", + self.source, + self.target + ) - #def run(self): - #return + def _fetch(self): + self._source = shared.XRay(self.source).parse() + def _save(self): + fm = frontmatter.loads('') + fm.content = self.content + fm.metadata = self.meta + with open(self.fpath, 'wt') as f: + f.write(frontmatter.dumps(fm)) + return -#class ReceiveWebmention(object): - ## TODO def __init__(self, source, target): - ## pull remote - ## validate if page links to X anywhere - ## find h-entry or use root as SOURCE - ## find author in SOURCE - ## find content in SOURCE - ## save under comments/[target slug]/mtime-[from-slufigied-url].md - ## + def run(self): + self._fetch() + self._save() - ## add to DB on return - #def run(self): - #return + @property + def relation(self): + r = 'webmention' + k = self._source.get('data').keys() + for maybe in ['in-reply-to', 'repost-of', 'bookmark-of', 'like-of']: + if maybe in k: + r = maybe + break + return r -#def parse_received_queue(): - # iterate over DB received + @property + def meta(self): + if not hasattr(self, '_meta'): + self._meta = { + 'author': self._source.get('data').get('author'), + 'type': self.relation, + 'target': self.target, + 'source': self.source, + 'date': self._source.get('data').get('published'), + } + return self._meta -#def parse_send_queue(): - # iterate over DB needs sending + @property + def content(self): + return shared.Pandoc('html').convert( + self._source.get('data').get('content').get('html') + ) -#def webmentions(target_slug): - # find all webmentions in the relevant directory - # return mtime => Webmention hash + @property + def fname(self): + return "%d-%s.md" % ( + self.dt.timestamp, + shared.slugfname(self.source) + ) + + @property + def fpath(self): + tdir = os.path.join( + shared.config.get('dirs', 'comment'), + self.target.rstrip('/').strip('/').split('/')[-1] + ) + if not os.path.isdir(tdir): + os.makedirs(tdir) + return os.path.join( + tdir, + self.fname + ) + def setup(): """ parse input parameters and add them as params section to config """

@@ -1161,6 +1222,7 @@ collector_categories = NoDupeContainer()

for f, post in content: logging.info("PARSING %s", f) + post.init_extras() # extend redirects for r in post.redirects:
M router.pyrouter.py

@@ -82,22 +82,12 @@ wdb = db.WebmentionQueue()

wdb.queue(source,target) # telegram notification, if set - if shared.config.has_section('api_telegram'): - url = "https://api.telegram.org/bot%s/sendMessage" % ( - shared.config.get('api_telegram', 'api_token') + shared.notify( + 'incoming webmention from %s to %s' % ( + source, + target ) - data = { - 'chat_id': shared.config.get('api_telegram', 'chat_id'), - 'text': 'incoming webmention from %s to %s' % ( - source, - target - ) - } - # fire and forget - try: - requests.post(url, data=data) - except: - pass + ) response = sanic.response.text("Accepted", status=202) return response
M shared.pyshared.py

@@ -6,6 +6,7 @@ import logging

import subprocess import json import sqlite3 +import requests from slugify import slugify import jinja2

@@ -24,6 +25,34 @@ which = glob.glob(os.path.join(d, name), recursive=True)

if which: return which.pop() return None + + +class XRay(CMDLine): + xraypath = '/usr/local/lib/php/xray' + + def __init__(self, url): + super().__init__('php') + self.url = url + + def parse(self): + cmd = ( + self.executable, + '-r', + '''chdir("%s"); include("vendor/autoload.php"); $xray = new p3k\XRay(); echo(json_encode($xray->parse("%s")));''' % (self.xraypath, self.url) + ) + logging.debug('pulling %s with XRay', self.url) + p = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + stdout, stderr = p.communicate() + if stderr: + logging.error("Error with XRay: %s", stderr) + + return json.loads(stdout.decode('utf-8').strip()) class Pandoc(CMDLine):

@@ -242,6 +271,25 @@ SiteVars[section][sub].update({o: config.get(sub, o)})

# push the whole thing into cache return SiteVars + + +def notify(msg): + # telegram notification, if set + if not shared.config.has_section('api_telegram'): + return + + url = "https://api.telegram.org/bot%s/sendMessage" % ( + shared.config.get('api_telegram', 'api_token') + ) + data = { + 'chat_id': shared.config.get('api_telegram', 'chat_id'), + 'text': msg + } + # fire and forget + try: + requests.post(url, data=data) + except: + pass ARROWFORMAT = {