adding XRay as parser for webmentions; processing incoming webmentions; moved notify via telegram to shared
This commit is contained in:
parent
2711276e08
commit
e5518ba4a1
4 changed files with 180 additions and 63 deletions
45
db.py
45
db.py
|
@ -199,20 +199,16 @@ class WebmentionQueue(object):
|
|||
)
|
||||
|
||||
cursor = self.db.cursor()
|
||||
cursor.execute('''CREATE TABLE IF NOT EXISTS `archive` (
|
||||
`id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
|
||||
`received` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`processed` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`source` TEXT NOT NULL,
|
||||
`target` TEXT NOT NULL
|
||||
);''');
|
||||
|
||||
cursor.execute('''CREATE TABLE IF NOT EXISTS `queue` (
|
||||
`id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
|
||||
`timestamp` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`source` TEXT NOT NULL,
|
||||
`target` TEXT NOT NULL
|
||||
);''');
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS `queue` (
|
||||
`id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
|
||||
`timestamp` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`source` TEXT NOT NULL,
|
||||
`target` TEXT NOT NULL,
|
||||
`status` INTEGER NOT NULL DEFAULT 0,
|
||||
`mtime` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
''')
|
||||
self.db.commit()
|
||||
|
||||
def __exit__(self):
|
||||
|
@ -230,3 +226,24 @@ class WebmentionQueue(object):
|
|||
)
|
||||
)
|
||||
self.db.commit()
|
||||
|
||||
def get_queued(self, fname=None):
|
||||
logging.debug('getting queued webmentions for %s', fname)
|
||||
ret = []
|
||||
cursor = self.db.cursor()
|
||||
cursor.execute('''SELECT * FROM queue WHERE target LIKE ? AND status = 0''', ('%'+fname+'%',))
|
||||
rows = cursor.fetchall()
|
||||
for r in rows:
|
||||
ret.append({
|
||||
'id': r[0],
|
||||
'dt': r[1],
|
||||
'source': r[2],
|
||||
'target': r[3],
|
||||
})
|
||||
return ret
|
||||
|
||||
def entry_done(self, id):
|
||||
logging.debug('setting %s webmention to done', id)
|
||||
cursor = self.db.cursor()
|
||||
cursor.execute("UPDATE queue SET status = 1 where ID=?", (id,))
|
||||
self.db.commit()
|
||||
|
|
130
nasg.py
130
nasg.py
|
@ -9,7 +9,6 @@ import glob
|
|||
import argparse
|
||||
import shutil
|
||||
from urllib.parse import urlparse
|
||||
#from urllib.parse import urljoin
|
||||
import asyncio
|
||||
from math import ceil
|
||||
import csv
|
||||
|
@ -20,9 +19,6 @@ import frontmatter
|
|||
import arrow
|
||||
import langdetect
|
||||
import wand.image
|
||||
|
||||
#import requests
|
||||
#from bs4 import BeautifulSoup
|
||||
from emoji import UNICODE_EMOJI
|
||||
|
||||
import shared
|
||||
|
@ -31,7 +27,7 @@ import db
|
|||
from pprint import pprint
|
||||
|
||||
class MagicPHP(object):
|
||||
name = 'magic.php'
|
||||
name = 'index.php'
|
||||
|
||||
def __init__(self):
|
||||
# init 'gone 410' array
|
||||
|
@ -330,6 +326,26 @@ class Singular(object):
|
|||
self.photo.cssclass = 'u-photo'
|
||||
|
||||
|
||||
def init_extras(self):
|
||||
self.process_webmentions()
|
||||
c = self.comments
|
||||
|
||||
|
||||
# TODO this should be async
|
||||
def process_webmentions(self):
|
||||
wdb = db.WebmentionQueue()
|
||||
queued = wdb.get_queued(self.url)
|
||||
for incoming in queued:
|
||||
wm = Webmention(
|
||||
incoming.get('id'),
|
||||
incoming.get('source'),
|
||||
incoming.get('target'),
|
||||
incoming.get('dt')
|
||||
)
|
||||
wm.run()
|
||||
|
||||
wdb.entry_done(incoming.get('id'))
|
||||
wdb.finish()
|
||||
|
||||
@property
|
||||
def redirects(self):
|
||||
|
@ -380,6 +396,10 @@ class Singular(object):
|
|||
)
|
||||
cfiles = [*cfiles, *maybe]
|
||||
for cpath in cfiles:
|
||||
cmtime = os.path.getmtime(cpath)
|
||||
if cmtime > self.mtime:
|
||||
self.mtime = cmtime
|
||||
|
||||
c = Comment(cpath)
|
||||
comments.append(c.mtime, c)
|
||||
return comments
|
||||
|
@ -853,7 +873,6 @@ class WebImage(object):
|
|||
|
||||
def _copy(self):
|
||||
fname = "%s%s" % (self.fname, self.fext)
|
||||
logging.info("copying %s to build dir", fname)
|
||||
fpath = os.path.join(
|
||||
shared.config.get('common', 'build'),
|
||||
shared.config.get('common', 'files'),
|
||||
|
@ -863,6 +882,7 @@ class WebImage(object):
|
|||
mtime = os.path.getmtime(fpath)
|
||||
if self.mtime <= mtime:
|
||||
return
|
||||
logging.info("copying %s to build dir", fname)
|
||||
shutil.copy(self.fpath, fpath)
|
||||
|
||||
def _intermediate_dimension(self, size, width, height, crop=False):
|
||||
|
@ -878,7 +898,7 @@ class WebImage(object):
|
|||
return (w, h)
|
||||
|
||||
def _intermediate(self, img, size, target, crop=False):
|
||||
if img.width <= size and img.height <= size:
|
||||
if img.width < size and img.height < size:
|
||||
return False
|
||||
|
||||
with img.clone() as thumb:
|
||||
|
@ -1044,40 +1064,81 @@ class Comment(object):
|
|||
return shared.j2.get_template(tmplfile).render({'comment': self.tmplvars})
|
||||
|
||||
|
||||
#class SendWebmention(object):
|
||||
## TODO def __init__(self, source, target):
|
||||
## check in gone.tsv?
|
||||
## discover endpoint
|
||||
## send webmention
|
||||
## add to DB on return
|
||||
class Webmention(object):
|
||||
def __init__ (self, id, source, target, dt):
|
||||
self.source = source
|
||||
self.target = target
|
||||
self.id = id
|
||||
self.dt = arrow.get(dt).to('utc')
|
||||
logging.info(
|
||||
"processing webmention %s => %s",
|
||||
self.source,
|
||||
self.target
|
||||
)
|
||||
|
||||
#def run(self):
|
||||
#return
|
||||
def _fetch(self):
|
||||
self._source = shared.XRay(self.source).parse()
|
||||
|
||||
def _save(self):
|
||||
fm = frontmatter.loads('')
|
||||
fm.content = self.content
|
||||
fm.metadata = self.meta
|
||||
with open(self.fpath, 'wt') as f:
|
||||
f.write(frontmatter.dumps(fm))
|
||||
return
|
||||
|
||||
#class ReceiveWebmention(object):
|
||||
## TODO def __init__(self, source, target):
|
||||
## pull remote
|
||||
## validate if page links to X anywhere
|
||||
## find h-entry or use root as SOURCE
|
||||
## find author in SOURCE
|
||||
## find content in SOURCE
|
||||
## save under comments/[target slug]/mtime-[from-slufigied-url].md
|
||||
##
|
||||
def run(self):
|
||||
self._fetch()
|
||||
self._save()
|
||||
|
||||
## add to DB on return
|
||||
#def run(self):
|
||||
#return
|
||||
@property
|
||||
def relation(self):
|
||||
r = 'webmention'
|
||||
k = self._source.get('data').keys()
|
||||
for maybe in ['in-reply-to', 'repost-of', 'bookmark-of', 'like-of']:
|
||||
if maybe in k:
|
||||
r = maybe
|
||||
break
|
||||
return r
|
||||
|
||||
#def parse_received_queue():
|
||||
# iterate over DB received
|
||||
@property
|
||||
def meta(self):
|
||||
if not hasattr(self, '_meta'):
|
||||
self._meta = {
|
||||
'author': self._source.get('data').get('author'),
|
||||
'type': self.relation,
|
||||
'target': self.target,
|
||||
'source': self.source,
|
||||
'date': self._source.get('data').get('published'),
|
||||
}
|
||||
return self._meta
|
||||
|
||||
#def parse_send_queue():
|
||||
# iterate over DB needs sending
|
||||
@property
|
||||
def content(self):
|
||||
return shared.Pandoc('html').convert(
|
||||
self._source.get('data').get('content').get('html')
|
||||
)
|
||||
|
||||
@property
|
||||
def fname(self):
|
||||
return "%d-%s.md" % (
|
||||
self.dt.timestamp,
|
||||
shared.slugfname(self.source)
|
||||
)
|
||||
|
||||
@property
|
||||
def fpath(self):
|
||||
tdir = os.path.join(
|
||||
shared.config.get('dirs', 'comment'),
|
||||
self.target.rstrip('/').strip('/').split('/')[-1]
|
||||
)
|
||||
if not os.path.isdir(tdir):
|
||||
os.makedirs(tdir)
|
||||
return os.path.join(
|
||||
tdir,
|
||||
self.fname
|
||||
)
|
||||
|
||||
#def webmentions(target_slug):
|
||||
# find all webmentions in the relevant directory
|
||||
# return mtime => Webmention hash
|
||||
|
||||
def setup():
|
||||
""" parse input parameters and add them as params section to config """
|
||||
|
@ -1161,6 +1222,7 @@ def build():
|
|||
|
||||
for f, post in content:
|
||||
logging.info("PARSING %s", f)
|
||||
post.init_extras()
|
||||
|
||||
# extend redirects
|
||||
for r in post.redirects:
|
||||
|
|
20
router.py
20
router.py
|
@ -82,22 +82,12 @@ if __name__ == '__main__':
|
|||
wdb.queue(source,target)
|
||||
|
||||
# telegram notification, if set
|
||||
if shared.config.has_section('api_telegram'):
|
||||
url = "https://api.telegram.org/bot%s/sendMessage" % (
|
||||
shared.config.get('api_telegram', 'api_token')
|
||||
shared.notify(
|
||||
'incoming webmention from %s to %s' % (
|
||||
source,
|
||||
target
|
||||
)
|
||||
data = {
|
||||
'chat_id': shared.config.get('api_telegram', 'chat_id'),
|
||||
'text': 'incoming webmention from %s to %s' % (
|
||||
source,
|
||||
target
|
||||
)
|
||||
}
|
||||
# fire and forget
|
||||
try:
|
||||
requests.post(url, data=data)
|
||||
except:
|
||||
pass
|
||||
)
|
||||
response = sanic.response.text("Accepted", status=202)
|
||||
return response
|
||||
|
||||
|
|
48
shared.py
48
shared.py
|
@ -6,6 +6,7 @@ import logging
|
|||
import subprocess
|
||||
import json
|
||||
import sqlite3
|
||||
import requests
|
||||
|
||||
from slugify import slugify
|
||||
import jinja2
|
||||
|
@ -26,6 +27,34 @@ class CMDLine(object):
|
|||
return None
|
||||
|
||||
|
||||
class XRay(CMDLine):
|
||||
xraypath = '/usr/local/lib/php/xray'
|
||||
|
||||
def __init__(self, url):
|
||||
super().__init__('php')
|
||||
self.url = url
|
||||
|
||||
def parse(self):
|
||||
cmd = (
|
||||
self.executable,
|
||||
'-r',
|
||||
'''chdir("%s"); include("vendor/autoload.php"); $xray = new p3k\XRay(); echo(json_encode($xray->parse("%s")));''' % (self.xraypath, self.url)
|
||||
)
|
||||
logging.debug('pulling %s with XRay', self.url)
|
||||
p = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
|
||||
stdout, stderr = p.communicate()
|
||||
if stderr:
|
||||
logging.error("Error with XRay: %s", stderr)
|
||||
|
||||
return json.loads(stdout.decode('utf-8').strip())
|
||||
|
||||
|
||||
class Pandoc(CMDLine):
|
||||
""" Pandoc command line call with piped in- and output """
|
||||
|
||||
|
@ -244,6 +273,25 @@ def __setup_sitevars():
|
|||
return SiteVars
|
||||
|
||||
|
||||
def notify(msg):
|
||||
# telegram notification, if set
|
||||
if not shared.config.has_section('api_telegram'):
|
||||
return
|
||||
|
||||
url = "https://api.telegram.org/bot%s/sendMessage" % (
|
||||
shared.config.get('api_telegram', 'api_token')
|
||||
)
|
||||
data = {
|
||||
'chat_id': shared.config.get('api_telegram', 'chat_id'),
|
||||
'text': msg
|
||||
}
|
||||
# fire and forget
|
||||
try:
|
||||
requests.post(url, data=data)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
ARROWFORMAT = {
|
||||
'iso': 'YYYY-MM-DDTHH:mm:ssZ',
|
||||
'display': 'YYYY-MM-DD HH:mm',
|
||||
|
|
Loading…
Reference in a new issue