adding XRay as parser for webmentions; processing incoming webmentions; moved notify via telegram to shared

This commit is contained in:
Peter Molnar 2017-10-29 19:11:01 +00:00
parent 2711276e08
commit e5518ba4a1
4 changed files with 180 additions and 63 deletions

39
db.py
View file

@ -199,20 +199,16 @@ class WebmentionQueue(object):
) )
cursor = self.db.cursor() cursor = self.db.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS `archive` ( cursor.execute('''
`id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE, CREATE TABLE IF NOT EXISTS `queue` (
`received` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`processed` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`source` TEXT NOT NULL,
`target` TEXT NOT NULL
);''');
cursor.execute('''CREATE TABLE IF NOT EXISTS `queue` (
`id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE, `id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
`timestamp` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, `timestamp` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`source` TEXT NOT NULL, `source` TEXT NOT NULL,
`target` TEXT NOT NULL `target` TEXT NOT NULL,
);'''); `status` INTEGER NOT NULL DEFAULT 0,
`mtime` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
);
''')
self.db.commit() self.db.commit()
def __exit__(self): def __exit__(self):
@ -230,3 +226,24 @@ class WebmentionQueue(object):
) )
) )
self.db.commit() self.db.commit()
def get_queued(self, fname=None):
logging.debug('getting queued webmentions for %s', fname)
ret = []
cursor = self.db.cursor()
cursor.execute('''SELECT * FROM queue WHERE target LIKE ? AND status = 0''', ('%'+fname+'%',))
rows = cursor.fetchall()
for r in rows:
ret.append({
'id': r[0],
'dt': r[1],
'source': r[2],
'target': r[3],
})
return ret
def entry_done(self, id):
logging.debug('setting %s webmention to done', id)
cursor = self.db.cursor()
cursor.execute("UPDATE queue SET status = 1 where ID=?", (id,))
self.db.commit()

130
nasg.py
View file

@ -9,7 +9,6 @@ import glob
import argparse import argparse
import shutil import shutil
from urllib.parse import urlparse from urllib.parse import urlparse
#from urllib.parse import urljoin
import asyncio import asyncio
from math import ceil from math import ceil
import csv import csv
@ -20,9 +19,6 @@ import frontmatter
import arrow import arrow
import langdetect import langdetect
import wand.image import wand.image
#import requests
#from bs4 import BeautifulSoup
from emoji import UNICODE_EMOJI from emoji import UNICODE_EMOJI
import shared import shared
@ -31,7 +27,7 @@ import db
from pprint import pprint from pprint import pprint
class MagicPHP(object): class MagicPHP(object):
name = 'magic.php' name = 'index.php'
def __init__(self): def __init__(self):
# init 'gone 410' array # init 'gone 410' array
@ -330,6 +326,26 @@ class Singular(object):
self.photo.cssclass = 'u-photo' self.photo.cssclass = 'u-photo'
def init_extras(self):
self.process_webmentions()
c = self.comments
# TODO this should be async
def process_webmentions(self):
wdb = db.WebmentionQueue()
queued = wdb.get_queued(self.url)
for incoming in queued:
wm = Webmention(
incoming.get('id'),
incoming.get('source'),
incoming.get('target'),
incoming.get('dt')
)
wm.run()
wdb.entry_done(incoming.get('id'))
wdb.finish()
@property @property
def redirects(self): def redirects(self):
@ -380,6 +396,10 @@ class Singular(object):
) )
cfiles = [*cfiles, *maybe] cfiles = [*cfiles, *maybe]
for cpath in cfiles: for cpath in cfiles:
cmtime = os.path.getmtime(cpath)
if cmtime > self.mtime:
self.mtime = cmtime
c = Comment(cpath) c = Comment(cpath)
comments.append(c.mtime, c) comments.append(c.mtime, c)
return comments return comments
@ -853,7 +873,6 @@ class WebImage(object):
def _copy(self): def _copy(self):
fname = "%s%s" % (self.fname, self.fext) fname = "%s%s" % (self.fname, self.fext)
logging.info("copying %s to build dir", fname)
fpath = os.path.join( fpath = os.path.join(
shared.config.get('common', 'build'), shared.config.get('common', 'build'),
shared.config.get('common', 'files'), shared.config.get('common', 'files'),
@ -863,6 +882,7 @@ class WebImage(object):
mtime = os.path.getmtime(fpath) mtime = os.path.getmtime(fpath)
if self.mtime <= mtime: if self.mtime <= mtime:
return return
logging.info("copying %s to build dir", fname)
shutil.copy(self.fpath, fpath) shutil.copy(self.fpath, fpath)
def _intermediate_dimension(self, size, width, height, crop=False): def _intermediate_dimension(self, size, width, height, crop=False):
@ -878,7 +898,7 @@ class WebImage(object):
return (w, h) return (w, h)
def _intermediate(self, img, size, target, crop=False): def _intermediate(self, img, size, target, crop=False):
if img.width <= size and img.height <= size: if img.width < size and img.height < size:
return False return False
with img.clone() as thumb: with img.clone() as thumb:
@ -1044,40 +1064,81 @@ class Comment(object):
return shared.j2.get_template(tmplfile).render({'comment': self.tmplvars}) return shared.j2.get_template(tmplfile).render({'comment': self.tmplvars})
#class SendWebmention(object): class Webmention(object):
## TODO def __init__(self, source, target): def __init__ (self, id, source, target, dt):
## check in gone.tsv? self.source = source
## discover endpoint self.target = target
## send webmention self.id = id
## add to DB on return self.dt = arrow.get(dt).to('utc')
logging.info(
"processing webmention %s => %s",
self.source,
self.target
)
#def run(self): def _fetch(self):
#return self._source = shared.XRay(self.source).parse()
def _save(self):
fm = frontmatter.loads('')
fm.content = self.content
fm.metadata = self.meta
with open(self.fpath, 'wt') as f:
f.write(frontmatter.dumps(fm))
return
#class ReceiveWebmention(object): def run(self):
## TODO def __init__(self, source, target): self._fetch()
## pull remote self._save()
## validate if page links to X anywhere
## find h-entry or use root as SOURCE
## find author in SOURCE
## find content in SOURCE
## save under comments/[target slug]/mtime-[from-slufigied-url].md
##
## add to DB on return @property
#def run(self): def relation(self):
#return r = 'webmention'
k = self._source.get('data').keys()
for maybe in ['in-reply-to', 'repost-of', 'bookmark-of', 'like-of']:
if maybe in k:
r = maybe
break
return r
#def parse_received_queue(): @property
# iterate over DB received def meta(self):
if not hasattr(self, '_meta'):
self._meta = {
'author': self._source.get('data').get('author'),
'type': self.relation,
'target': self.target,
'source': self.source,
'date': self._source.get('data').get('published'),
}
return self._meta
#def parse_send_queue(): @property
# iterate over DB needs sending def content(self):
return shared.Pandoc('html').convert(
self._source.get('data').get('content').get('html')
)
@property
def fname(self):
return "%d-%s.md" % (
self.dt.timestamp,
shared.slugfname(self.source)
)
@property
def fpath(self):
tdir = os.path.join(
shared.config.get('dirs', 'comment'),
self.target.rstrip('/').strip('/').split('/')[-1]
)
if not os.path.isdir(tdir):
os.makedirs(tdir)
return os.path.join(
tdir,
self.fname
)
#def webmentions(target_slug):
# find all webmentions in the relevant directory
# return mtime => Webmention hash
def setup(): def setup():
""" parse input parameters and add them as params section to config """ """ parse input parameters and add them as params section to config """
@ -1161,6 +1222,7 @@ def build():
for f, post in content: for f, post in content:
logging.info("PARSING %s", f) logging.info("PARSING %s", f)
post.init_extras()
# extend redirects # extend redirects
for r in post.redirects: for r in post.redirects:

View file

@ -82,22 +82,12 @@ if __name__ == '__main__':
wdb.queue(source,target) wdb.queue(source,target)
# telegram notification, if set # telegram notification, if set
if shared.config.has_section('api_telegram'): shared.notify(
url = "https://api.telegram.org/bot%s/sendMessage" % ( 'incoming webmention from %s to %s' % (
shared.config.get('api_telegram', 'api_token')
)
data = {
'chat_id': shared.config.get('api_telegram', 'chat_id'),
'text': 'incoming webmention from %s to %s' % (
source, source,
target target
) )
} )
# fire and forget
try:
requests.post(url, data=data)
except:
pass
response = sanic.response.text("Accepted", status=202) response = sanic.response.text("Accepted", status=202)
return response return response

View file

@ -6,6 +6,7 @@ import logging
import subprocess import subprocess
import json import json
import sqlite3 import sqlite3
import requests
from slugify import slugify from slugify import slugify
import jinja2 import jinja2
@ -26,6 +27,34 @@ class CMDLine(object):
return None return None
class XRay(CMDLine):
xraypath = '/usr/local/lib/php/xray'
def __init__(self, url):
super().__init__('php')
self.url = url
def parse(self):
cmd = (
self.executable,
'-r',
'''chdir("%s"); include("vendor/autoload.php"); $xray = new p3k\XRay(); echo(json_encode($xray->parse("%s")));''' % (self.xraypath, self.url)
)
logging.debug('pulling %s with XRay', self.url)
p = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = p.communicate()
if stderr:
logging.error("Error with XRay: %s", stderr)
return json.loads(stdout.decode('utf-8').strip())
class Pandoc(CMDLine): class Pandoc(CMDLine):
""" Pandoc command line call with piped in- and output """ """ Pandoc command line call with piped in- and output """
@ -244,6 +273,25 @@ def __setup_sitevars():
return SiteVars return SiteVars
def notify(msg):
# telegram notification, if set
if not shared.config.has_section('api_telegram'):
return
url = "https://api.telegram.org/bot%s/sendMessage" % (
shared.config.get('api_telegram', 'api_token')
)
data = {
'chat_id': shared.config.get('api_telegram', 'chat_id'),
'text': msg
}
# fire and forget
try:
requests.post(url, data=data)
except:
pass
ARROWFORMAT = { ARROWFORMAT = {
'iso': 'YYYY-MM-DDTHH:mm:ssZ', 'iso': 'YYYY-MM-DDTHH:mm:ssZ',
'display': 'YYYY-MM-DD HH:mm', 'display': 'YYYY-MM-DD HH:mm',