adding XRay as parser for webmentions; processing incoming webmentions; moved notify via telegram to shared

This commit is contained in:
Peter Molnar 2017-10-29 19:11:01 +00:00
parent 2711276e08
commit e5518ba4a1
4 changed files with 180 additions and 63 deletions

45
db.py
View file

@ -199,20 +199,16 @@ class WebmentionQueue(object):
)
cursor = self.db.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS `archive` (
`id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
`received` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`processed` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`source` TEXT NOT NULL,
`target` TEXT NOT NULL
);''');
cursor.execute('''CREATE TABLE IF NOT EXISTS `queue` (
`id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
`timestamp` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`source` TEXT NOT NULL,
`target` TEXT NOT NULL
);''');
cursor.execute('''
CREATE TABLE IF NOT EXISTS `queue` (
`id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
`timestamp` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`source` TEXT NOT NULL,
`target` TEXT NOT NULL,
`status` INTEGER NOT NULL DEFAULT 0,
`mtime` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
);
''')
self.db.commit()
def __exit__(self):
@ -230,3 +226,24 @@ class WebmentionQueue(object):
)
)
self.db.commit()
def get_queued(self, fname=None):
logging.debug('getting queued webmentions for %s', fname)
ret = []
cursor = self.db.cursor()
cursor.execute('''SELECT * FROM queue WHERE target LIKE ? AND status = 0''', ('%'+fname+'%',))
rows = cursor.fetchall()
for r in rows:
ret.append({
'id': r[0],
'dt': r[1],
'source': r[2],
'target': r[3],
})
return ret
def entry_done(self, id):
logging.debug('setting %s webmention to done', id)
cursor = self.db.cursor()
cursor.execute("UPDATE queue SET status = 1 where ID=?", (id,))
self.db.commit()

130
nasg.py
View file

@ -9,7 +9,6 @@ import glob
import argparse
import shutil
from urllib.parse import urlparse
#from urllib.parse import urljoin
import asyncio
from math import ceil
import csv
@ -20,9 +19,6 @@ import frontmatter
import arrow
import langdetect
import wand.image
#import requests
#from bs4 import BeautifulSoup
from emoji import UNICODE_EMOJI
import shared
@ -31,7 +27,7 @@ import db
from pprint import pprint
class MagicPHP(object):
name = 'magic.php'
name = 'index.php'
def __init__(self):
# init 'gone 410' array
@ -330,6 +326,26 @@ class Singular(object):
self.photo.cssclass = 'u-photo'
def init_extras(self):
self.process_webmentions()
c = self.comments
# TODO this should be async
def process_webmentions(self):
wdb = db.WebmentionQueue()
queued = wdb.get_queued(self.url)
for incoming in queued:
wm = Webmention(
incoming.get('id'),
incoming.get('source'),
incoming.get('target'),
incoming.get('dt')
)
wm.run()
wdb.entry_done(incoming.get('id'))
wdb.finish()
@property
def redirects(self):
@ -380,6 +396,10 @@ class Singular(object):
)
cfiles = [*cfiles, *maybe]
for cpath in cfiles:
cmtime = os.path.getmtime(cpath)
if cmtime > self.mtime:
self.mtime = cmtime
c = Comment(cpath)
comments.append(c.mtime, c)
return comments
@ -853,7 +873,6 @@ class WebImage(object):
def _copy(self):
fname = "%s%s" % (self.fname, self.fext)
logging.info("copying %s to build dir", fname)
fpath = os.path.join(
shared.config.get('common', 'build'),
shared.config.get('common', 'files'),
@ -863,6 +882,7 @@ class WebImage(object):
mtime = os.path.getmtime(fpath)
if self.mtime <= mtime:
return
logging.info("copying %s to build dir", fname)
shutil.copy(self.fpath, fpath)
def _intermediate_dimension(self, size, width, height, crop=False):
@ -878,7 +898,7 @@ class WebImage(object):
return (w, h)
def _intermediate(self, img, size, target, crop=False):
if img.width <= size and img.height <= size:
if img.width < size and img.height < size:
return False
with img.clone() as thumb:
@ -1044,40 +1064,81 @@ class Comment(object):
return shared.j2.get_template(tmplfile).render({'comment': self.tmplvars})
#class SendWebmention(object):
## TODO def __init__(self, source, target):
## check in gone.tsv?
## discover endpoint
## send webmention
## add to DB on return
class Webmention(object):
def __init__ (self, id, source, target, dt):
self.source = source
self.target = target
self.id = id
self.dt = arrow.get(dt).to('utc')
logging.info(
"processing webmention %s => %s",
self.source,
self.target
)
#def run(self):
#return
def _fetch(self):
self._source = shared.XRay(self.source).parse()
def _save(self):
fm = frontmatter.loads('')
fm.content = self.content
fm.metadata = self.meta
with open(self.fpath, 'wt') as f:
f.write(frontmatter.dumps(fm))
return
#class ReceiveWebmention(object):
## TODO def __init__(self, source, target):
## pull remote
## validate if page links to X anywhere
## find h-entry or use root as SOURCE
## find author in SOURCE
## find content in SOURCE
## save under comments/[target slug]/mtime-[from-slufigied-url].md
##
def run(self):
self._fetch()
self._save()
## add to DB on return
#def run(self):
#return
@property
def relation(self):
r = 'webmention'
k = self._source.get('data').keys()
for maybe in ['in-reply-to', 'repost-of', 'bookmark-of', 'like-of']:
if maybe in k:
r = maybe
break
return r
#def parse_received_queue():
# iterate over DB received
@property
def meta(self):
if not hasattr(self, '_meta'):
self._meta = {
'author': self._source.get('data').get('author'),
'type': self.relation,
'target': self.target,
'source': self.source,
'date': self._source.get('data').get('published'),
}
return self._meta
#def parse_send_queue():
# iterate over DB needs sending
@property
def content(self):
return shared.Pandoc('html').convert(
self._source.get('data').get('content').get('html')
)
@property
def fname(self):
return "%d-%s.md" % (
self.dt.timestamp,
shared.slugfname(self.source)
)
@property
def fpath(self):
tdir = os.path.join(
shared.config.get('dirs', 'comment'),
self.target.rstrip('/').strip('/').split('/')[-1]
)
if not os.path.isdir(tdir):
os.makedirs(tdir)
return os.path.join(
tdir,
self.fname
)
#def webmentions(target_slug):
# find all webmentions in the relevant directory
# return mtime => Webmention hash
def setup():
""" parse input parameters and add them as params section to config """
@ -1161,6 +1222,7 @@ def build():
for f, post in content:
logging.info("PARSING %s", f)
post.init_extras()
# extend redirects
for r in post.redirects:

View file

@ -82,22 +82,12 @@ if __name__ == '__main__':
wdb.queue(source,target)
# telegram notification, if set
if shared.config.has_section('api_telegram'):
url = "https://api.telegram.org/bot%s/sendMessage" % (
shared.config.get('api_telegram', 'api_token')
shared.notify(
'incoming webmention from %s to %s' % (
source,
target
)
data = {
'chat_id': shared.config.get('api_telegram', 'chat_id'),
'text': 'incoming webmention from %s to %s' % (
source,
target
)
}
# fire and forget
try:
requests.post(url, data=data)
except:
pass
)
response = sanic.response.text("Accepted", status=202)
return response

View file

@ -6,6 +6,7 @@ import logging
import subprocess
import json
import sqlite3
import requests
from slugify import slugify
import jinja2
@ -26,6 +27,34 @@ class CMDLine(object):
return None
class XRay(CMDLine):
xraypath = '/usr/local/lib/php/xray'
def __init__(self, url):
super().__init__('php')
self.url = url
def parse(self):
cmd = (
self.executable,
'-r',
'''chdir("%s"); include("vendor/autoload.php"); $xray = new p3k\XRay(); echo(json_encode($xray->parse("%s")));''' % (self.xraypath, self.url)
)
logging.debug('pulling %s with XRay', self.url)
p = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = p.communicate()
if stderr:
logging.error("Error with XRay: %s", stderr)
return json.loads(stdout.decode('utf-8').strip())
class Pandoc(CMDLine):
""" Pandoc command line call with piped in- and output """
@ -244,6 +273,25 @@ def __setup_sitevars():
return SiteVars
def notify(msg):
# telegram notification, if set
if not shared.config.has_section('api_telegram'):
return
url = "https://api.telegram.org/bot%s/sendMessage" % (
shared.config.get('api_telegram', 'api_token')
)
data = {
'chat_id': shared.config.get('api_telegram', 'chat_id'),
'text': msg
}
# fire and forget
try:
requests.post(url, data=data)
except:
pass
ARROWFORMAT = {
'iso': 'YYYY-MM-DDTHH:mm:ssZ',
'display': 'YYYY-MM-DD HH:mm',