adding XRay as parser for webmentions; processing incoming webmentions; moved notify via telegram to shared
This commit is contained in:
parent
2711276e08
commit
e5518ba4a1
4 changed files with 180 additions and 63 deletions
39
db.py
39
db.py
|
@ -199,20 +199,16 @@ class WebmentionQueue(object):
|
||||||
)
|
)
|
||||||
|
|
||||||
cursor = self.db.cursor()
|
cursor = self.db.cursor()
|
||||||
cursor.execute('''CREATE TABLE IF NOT EXISTS `archive` (
|
cursor.execute('''
|
||||||
`id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
|
CREATE TABLE IF NOT EXISTS `queue` (
|
||||||
`received` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
||||||
`processed` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
||||||
`source` TEXT NOT NULL,
|
|
||||||
`target` TEXT NOT NULL
|
|
||||||
);''');
|
|
||||||
|
|
||||||
cursor.execute('''CREATE TABLE IF NOT EXISTS `queue` (
|
|
||||||
`id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
|
`id` INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
|
||||||
`timestamp` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
`timestamp` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
`source` TEXT NOT NULL,
|
`source` TEXT NOT NULL,
|
||||||
`target` TEXT NOT NULL
|
`target` TEXT NOT NULL,
|
||||||
);''');
|
`status` INTEGER NOT NULL DEFAULT 0,
|
||||||
|
`mtime` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
''')
|
||||||
self.db.commit()
|
self.db.commit()
|
||||||
|
|
||||||
def __exit__(self):
|
def __exit__(self):
|
||||||
|
@ -230,3 +226,24 @@ class WebmentionQueue(object):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.db.commit()
|
self.db.commit()
|
||||||
|
|
||||||
|
def get_queued(self, fname=None):
|
||||||
|
logging.debug('getting queued webmentions for %s', fname)
|
||||||
|
ret = []
|
||||||
|
cursor = self.db.cursor()
|
||||||
|
cursor.execute('''SELECT * FROM queue WHERE target LIKE ? AND status = 0''', ('%'+fname+'%',))
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
for r in rows:
|
||||||
|
ret.append({
|
||||||
|
'id': r[0],
|
||||||
|
'dt': r[1],
|
||||||
|
'source': r[2],
|
||||||
|
'target': r[3],
|
||||||
|
})
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def entry_done(self, id):
|
||||||
|
logging.debug('setting %s webmention to done', id)
|
||||||
|
cursor = self.db.cursor()
|
||||||
|
cursor.execute("UPDATE queue SET status = 1 where ID=?", (id,))
|
||||||
|
self.db.commit()
|
||||||
|
|
130
nasg.py
130
nasg.py
|
@ -9,7 +9,6 @@ import glob
|
||||||
import argparse
|
import argparse
|
||||||
import shutil
|
import shutil
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
#from urllib.parse import urljoin
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from math import ceil
|
from math import ceil
|
||||||
import csv
|
import csv
|
||||||
|
@ -20,9 +19,6 @@ import frontmatter
|
||||||
import arrow
|
import arrow
|
||||||
import langdetect
|
import langdetect
|
||||||
import wand.image
|
import wand.image
|
||||||
|
|
||||||
#import requests
|
|
||||||
#from bs4 import BeautifulSoup
|
|
||||||
from emoji import UNICODE_EMOJI
|
from emoji import UNICODE_EMOJI
|
||||||
|
|
||||||
import shared
|
import shared
|
||||||
|
@ -31,7 +27,7 @@ import db
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|
||||||
class MagicPHP(object):
|
class MagicPHP(object):
|
||||||
name = 'magic.php'
|
name = 'index.php'
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# init 'gone 410' array
|
# init 'gone 410' array
|
||||||
|
@ -330,6 +326,26 @@ class Singular(object):
|
||||||
self.photo.cssclass = 'u-photo'
|
self.photo.cssclass = 'u-photo'
|
||||||
|
|
||||||
|
|
||||||
|
def init_extras(self):
|
||||||
|
self.process_webmentions()
|
||||||
|
c = self.comments
|
||||||
|
|
||||||
|
|
||||||
|
# TODO this should be async
|
||||||
|
def process_webmentions(self):
|
||||||
|
wdb = db.WebmentionQueue()
|
||||||
|
queued = wdb.get_queued(self.url)
|
||||||
|
for incoming in queued:
|
||||||
|
wm = Webmention(
|
||||||
|
incoming.get('id'),
|
||||||
|
incoming.get('source'),
|
||||||
|
incoming.get('target'),
|
||||||
|
incoming.get('dt')
|
||||||
|
)
|
||||||
|
wm.run()
|
||||||
|
|
||||||
|
wdb.entry_done(incoming.get('id'))
|
||||||
|
wdb.finish()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def redirects(self):
|
def redirects(self):
|
||||||
|
@ -380,6 +396,10 @@ class Singular(object):
|
||||||
)
|
)
|
||||||
cfiles = [*cfiles, *maybe]
|
cfiles = [*cfiles, *maybe]
|
||||||
for cpath in cfiles:
|
for cpath in cfiles:
|
||||||
|
cmtime = os.path.getmtime(cpath)
|
||||||
|
if cmtime > self.mtime:
|
||||||
|
self.mtime = cmtime
|
||||||
|
|
||||||
c = Comment(cpath)
|
c = Comment(cpath)
|
||||||
comments.append(c.mtime, c)
|
comments.append(c.mtime, c)
|
||||||
return comments
|
return comments
|
||||||
|
@ -853,7 +873,6 @@ class WebImage(object):
|
||||||
|
|
||||||
def _copy(self):
|
def _copy(self):
|
||||||
fname = "%s%s" % (self.fname, self.fext)
|
fname = "%s%s" % (self.fname, self.fext)
|
||||||
logging.info("copying %s to build dir", fname)
|
|
||||||
fpath = os.path.join(
|
fpath = os.path.join(
|
||||||
shared.config.get('common', 'build'),
|
shared.config.get('common', 'build'),
|
||||||
shared.config.get('common', 'files'),
|
shared.config.get('common', 'files'),
|
||||||
|
@ -863,6 +882,7 @@ class WebImage(object):
|
||||||
mtime = os.path.getmtime(fpath)
|
mtime = os.path.getmtime(fpath)
|
||||||
if self.mtime <= mtime:
|
if self.mtime <= mtime:
|
||||||
return
|
return
|
||||||
|
logging.info("copying %s to build dir", fname)
|
||||||
shutil.copy(self.fpath, fpath)
|
shutil.copy(self.fpath, fpath)
|
||||||
|
|
||||||
def _intermediate_dimension(self, size, width, height, crop=False):
|
def _intermediate_dimension(self, size, width, height, crop=False):
|
||||||
|
@ -878,7 +898,7 @@ class WebImage(object):
|
||||||
return (w, h)
|
return (w, h)
|
||||||
|
|
||||||
def _intermediate(self, img, size, target, crop=False):
|
def _intermediate(self, img, size, target, crop=False):
|
||||||
if img.width <= size and img.height <= size:
|
if img.width < size and img.height < size:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
with img.clone() as thumb:
|
with img.clone() as thumb:
|
||||||
|
@ -1044,40 +1064,81 @@ class Comment(object):
|
||||||
return shared.j2.get_template(tmplfile).render({'comment': self.tmplvars})
|
return shared.j2.get_template(tmplfile).render({'comment': self.tmplvars})
|
||||||
|
|
||||||
|
|
||||||
#class SendWebmention(object):
|
class Webmention(object):
|
||||||
## TODO def __init__(self, source, target):
|
def __init__ (self, id, source, target, dt):
|
||||||
## check in gone.tsv?
|
self.source = source
|
||||||
## discover endpoint
|
self.target = target
|
||||||
## send webmention
|
self.id = id
|
||||||
## add to DB on return
|
self.dt = arrow.get(dt).to('utc')
|
||||||
|
logging.info(
|
||||||
|
"processing webmention %s => %s",
|
||||||
|
self.source,
|
||||||
|
self.target
|
||||||
|
)
|
||||||
|
|
||||||
#def run(self):
|
def _fetch(self):
|
||||||
#return
|
self._source = shared.XRay(self.source).parse()
|
||||||
|
|
||||||
|
def _save(self):
|
||||||
|
fm = frontmatter.loads('')
|
||||||
|
fm.content = self.content
|
||||||
|
fm.metadata = self.meta
|
||||||
|
with open(self.fpath, 'wt') as f:
|
||||||
|
f.write(frontmatter.dumps(fm))
|
||||||
|
return
|
||||||
|
|
||||||
#class ReceiveWebmention(object):
|
def run(self):
|
||||||
## TODO def __init__(self, source, target):
|
self._fetch()
|
||||||
## pull remote
|
self._save()
|
||||||
## validate if page links to X anywhere
|
|
||||||
## find h-entry or use root as SOURCE
|
|
||||||
## find author in SOURCE
|
|
||||||
## find content in SOURCE
|
|
||||||
## save under comments/[target slug]/mtime-[from-slufigied-url].md
|
|
||||||
##
|
|
||||||
|
|
||||||
## add to DB on return
|
@property
|
||||||
#def run(self):
|
def relation(self):
|
||||||
#return
|
r = 'webmention'
|
||||||
|
k = self._source.get('data').keys()
|
||||||
|
for maybe in ['in-reply-to', 'repost-of', 'bookmark-of', 'like-of']:
|
||||||
|
if maybe in k:
|
||||||
|
r = maybe
|
||||||
|
break
|
||||||
|
return r
|
||||||
|
|
||||||
#def parse_received_queue():
|
@property
|
||||||
# iterate over DB received
|
def meta(self):
|
||||||
|
if not hasattr(self, '_meta'):
|
||||||
|
self._meta = {
|
||||||
|
'author': self._source.get('data').get('author'),
|
||||||
|
'type': self.relation,
|
||||||
|
'target': self.target,
|
||||||
|
'source': self.source,
|
||||||
|
'date': self._source.get('data').get('published'),
|
||||||
|
}
|
||||||
|
return self._meta
|
||||||
|
|
||||||
#def parse_send_queue():
|
@property
|
||||||
# iterate over DB needs sending
|
def content(self):
|
||||||
|
return shared.Pandoc('html').convert(
|
||||||
|
self._source.get('data').get('content').get('html')
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fname(self):
|
||||||
|
return "%d-%s.md" % (
|
||||||
|
self.dt.timestamp,
|
||||||
|
shared.slugfname(self.source)
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fpath(self):
|
||||||
|
tdir = os.path.join(
|
||||||
|
shared.config.get('dirs', 'comment'),
|
||||||
|
self.target.rstrip('/').strip('/').split('/')[-1]
|
||||||
|
)
|
||||||
|
if not os.path.isdir(tdir):
|
||||||
|
os.makedirs(tdir)
|
||||||
|
return os.path.join(
|
||||||
|
tdir,
|
||||||
|
self.fname
|
||||||
|
)
|
||||||
|
|
||||||
#def webmentions(target_slug):
|
|
||||||
# find all webmentions in the relevant directory
|
|
||||||
# return mtime => Webmention hash
|
|
||||||
|
|
||||||
def setup():
|
def setup():
|
||||||
""" parse input parameters and add them as params section to config """
|
""" parse input parameters and add them as params section to config """
|
||||||
|
@ -1161,6 +1222,7 @@ def build():
|
||||||
|
|
||||||
for f, post in content:
|
for f, post in content:
|
||||||
logging.info("PARSING %s", f)
|
logging.info("PARSING %s", f)
|
||||||
|
post.init_extras()
|
||||||
|
|
||||||
# extend redirects
|
# extend redirects
|
||||||
for r in post.redirects:
|
for r in post.redirects:
|
||||||
|
|
16
router.py
16
router.py
|
@ -82,22 +82,12 @@ if __name__ == '__main__':
|
||||||
wdb.queue(source,target)
|
wdb.queue(source,target)
|
||||||
|
|
||||||
# telegram notification, if set
|
# telegram notification, if set
|
||||||
if shared.config.has_section('api_telegram'):
|
shared.notify(
|
||||||
url = "https://api.telegram.org/bot%s/sendMessage" % (
|
'incoming webmention from %s to %s' % (
|
||||||
shared.config.get('api_telegram', 'api_token')
|
|
||||||
)
|
|
||||||
data = {
|
|
||||||
'chat_id': shared.config.get('api_telegram', 'chat_id'),
|
|
||||||
'text': 'incoming webmention from %s to %s' % (
|
|
||||||
source,
|
source,
|
||||||
target
|
target
|
||||||
)
|
)
|
||||||
}
|
)
|
||||||
# fire and forget
|
|
||||||
try:
|
|
||||||
requests.post(url, data=data)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
response = sanic.response.text("Accepted", status=202)
|
response = sanic.response.text("Accepted", status=202)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
48
shared.py
48
shared.py
|
@ -6,6 +6,7 @@ import logging
|
||||||
import subprocess
|
import subprocess
|
||||||
import json
|
import json
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import requests
|
||||||
|
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
import jinja2
|
import jinja2
|
||||||
|
@ -26,6 +27,34 @@ class CMDLine(object):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class XRay(CMDLine):
|
||||||
|
xraypath = '/usr/local/lib/php/xray'
|
||||||
|
|
||||||
|
def __init__(self, url):
|
||||||
|
super().__init__('php')
|
||||||
|
self.url = url
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
cmd = (
|
||||||
|
self.executable,
|
||||||
|
'-r',
|
||||||
|
'''chdir("%s"); include("vendor/autoload.php"); $xray = new p3k\XRay(); echo(json_encode($xray->parse("%s")));''' % (self.xraypath, self.url)
|
||||||
|
)
|
||||||
|
logging.debug('pulling %s with XRay', self.url)
|
||||||
|
p = subprocess.Popen(
|
||||||
|
cmd,
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
)
|
||||||
|
|
||||||
|
stdout, stderr = p.communicate()
|
||||||
|
if stderr:
|
||||||
|
logging.error("Error with XRay: %s", stderr)
|
||||||
|
|
||||||
|
return json.loads(stdout.decode('utf-8').strip())
|
||||||
|
|
||||||
|
|
||||||
class Pandoc(CMDLine):
|
class Pandoc(CMDLine):
|
||||||
""" Pandoc command line call with piped in- and output """
|
""" Pandoc command line call with piped in- and output """
|
||||||
|
|
||||||
|
@ -244,6 +273,25 @@ def __setup_sitevars():
|
||||||
return SiteVars
|
return SiteVars
|
||||||
|
|
||||||
|
|
||||||
|
def notify(msg):
|
||||||
|
# telegram notification, if set
|
||||||
|
if not shared.config.has_section('api_telegram'):
|
||||||
|
return
|
||||||
|
|
||||||
|
url = "https://api.telegram.org/bot%s/sendMessage" % (
|
||||||
|
shared.config.get('api_telegram', 'api_token')
|
||||||
|
)
|
||||||
|
data = {
|
||||||
|
'chat_id': shared.config.get('api_telegram', 'chat_id'),
|
||||||
|
'text': msg
|
||||||
|
}
|
||||||
|
# fire and forget
|
||||||
|
try:
|
||||||
|
requests.post(url, data=data)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
ARROWFORMAT = {
|
ARROWFORMAT = {
|
||||||
'iso': 'YYYY-MM-DDTHH:mm:ssZ',
|
'iso': 'YYYY-MM-DDTHH:mm:ssZ',
|
||||||
'display': 'YYYY-MM-DD HH:mm',
|
'display': 'YYYY-MM-DD HH:mm',
|
||||||
|
|
Loading…
Reference in a new issue