From f84088f311cdb36dacf973411c9731446c67104b Mon Sep 17 00:00:00 2001 From: Peter Molnar Date: Thu, 29 Mar 2018 16:07:53 +0000 Subject: [PATCH] v2.1.0 - changing version numbering - automated webmentions sending --- README.md | 2 +- nasg.py | 97 +++++++++++++++++++++++++++++++++++++------------------ router.py | 19 +++-------- shared.py | 26 ++++++++++----- 4 files changed, 88 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 65d8216..5b9abca 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ It is most probably not suitable for anyone else, but feel free to use it for id ## Why not [insert static generator here]? -- DRY - Don't Repeat Yourself - is good, so instead of sidefiles for images, I'm using XMP metadata, which most of the ones availabe don't handle well; +- I'm using embedded XMP metadata in photos, which most of the ones availabe don't handle well; - writing plugins to existing generators - Pelican, Nicola, etc - might have taken longer and I wanted to extend my Python knowledge - I wanted to use the best available utilities for some tasks, like `Pandoc` and `exiftool` instead of Python libraries trying to achive the same - I needed to handle webmentions and comments diff --git a/nasg.py b/nasg.py index ce2448c..b602bd9 100644 --- a/nasg.py +++ b/nasg.py @@ -3,16 +3,16 @@ # vim: set fileencoding=utf-8 : __author__ = "Peter Molnar" -__copyright__ = "Copyright 2017, Peter Molnar" +__copyright__ = "Copyright 2017-2018, Peter Molnar" __license__ = "GPLv3" -__version__ = "2.0" +__version__ = "2.1.0" __maintainer__ = "Peter Molnar" __email__ = "hello@petermolnar.eu" __status__ = "Production" """ silo archiver module of NASG - Copyright (C) 2017 Peter Molnar + Copyright (C) 2017-2018 Peter Molnar This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -242,7 +242,6 @@ class Category(NoDupeContainer): ) ) - @property def url(self): if self.name: @@ -280,7 +279,6 @@ class Category(NoDupeContainer): out.write(content) os.utime(path, (self.mtime, self.mtime)) - async def render(self): if self.is_altrender: self.render_onepage() @@ -490,20 +488,32 @@ class Singular(object): wdb.entry_done(incoming.get('id')) wdb.finish() - # note: due to SQLite locking, this will not be async for now - def send_webmentions(self): - if not self.is_reply: - return + def queue_webmentions(self): wdb = shared.WebmentionQueue() - wid = wdb.queue(self.url, self.is_reply) - wm = Webmention( - self.url, - self.is_reply - ) - wm.send() - wdb.entry_done(wid) + for target in self.urls_to_ping: + if not wdb.exists(self.url, target, self.published): + wdb.queue(self.url, target) + else: + logging.debug("not queueing - webmention already queued from %s to %s", self.url, target) wdb.finish() + @property + def urls_to_ping(self): + urls = [x.strip() for x in shared.REGEX.get('urls').findall(self.content)] + if self.is_reply: + urls.append(self.is_reply) + for url in self.syndicate: + urls.append(url) + r = {} + for link in urls: + parsed = urlparse(link) + if parsed.netloc in shared.config.get('site', 'domains'): + continue + if link in r: + continue + r.update({link: True}) + return r.keys() + @property def redirects(self): r = self.meta.get('redirect', []) @@ -684,7 +694,7 @@ class Singular(object): @property def url(self): - return "%s/%s" % (shared.config.get('site', 'url'), self.fname) + return "%s/%s/" % (shared.config.get('site', 'url'), self.fname) @property def body(self): @@ -732,7 +742,7 @@ class Singular(object): @property def syndicate(self): - urls = [] + urls = self.meta.get('syndicate', []) if self.photo and self.photo.is_photo: urls.append("https://brid.gy/publish/flickr") return urls @@ -1300,27 +1310,42 @@ class Webmention(object): fm.content = self.content fm.metadata = self.meta with open(self.fpath, 'wt') as f: + logging.info("Saving webmention to %s", self.fpath) f.write(frontmatter.dumps(fm)) return def send(self): - rels = shared.XRay(self.source).set_discover().parse() + rels = shared.XRay(self.target).set_discover().parse() endpoint = False if 'rels' not in rels: - return + logging.debug("no rel found for %s", self.target) + return True for k in rels.get('rels').keys(): if 'webmention' in k: - endpoint = rels.get('rels').get(k) + endpoint = rels.get('rels').get(k).pop() break if not endpoint: - return - requests.post( + logging.debug("no endpoint found for %s", self.target) + return True + logging.info( + "Sending webmention to endpoint: %s, source: %s, target: %s", + endpoint, + self.source, self.target, - data={ - 'source': self.source, - 'target': self.target - } ) + try: + p = requests.post( + endpoint, + data={ + 'source': self.source, + 'target': self.target + } + ) + if p.status_code == requests.codes.ok: + return True + except Exception as e: + logging.error("sending webmention failed: %s", e) + return False def receive(self): self._fetch() @@ -1464,6 +1489,7 @@ def build(): for f, post in content: logging.info("PARSING %s", f) post.init_extras() + post.queue_webmentions() # add to sitemap sitemap.update({ post.url: post.mtime }) @@ -1525,17 +1551,24 @@ def build(): if not c.is_uptodate or shared.config.getboolean('params', 'force'): worker.append(c.render()) - # TODO move ping to separate function and add it as a task - # TODO separate an aiohttpworker? - # add magic.php rendering worker.append(magic.render()) - # TODO: send webmentions - # do all the things! worker.run() + # send webmentions - this is synchronous due to the SQLite locking + wdb = shared.WebmentionQueue() + for out in wdb.get_outbox(): + wm = Webmention( + out.get('source'), + out.get('target'), + out.get('dt') + ) + if wm.send(): + wdb.entry_done(out.get('id')) + wdb.finish() + # copy static logging.info('copying static files') src = shared.config.get('dirs', 'static') diff --git a/router.py b/router.py index 61db587..ee989e4 100644 --- a/router.py +++ b/router.py @@ -3,16 +3,16 @@ # vim: set fileencoding=utf-8 : __author__ = "Peter Molnar" -__copyright__ = "Copyright 2017, Peter Molnar" +__copyright__ = "Copyright 2017-2018, Peter Molnar" __license__ = "GPLv3" -__version__ = "2.0" +__version__ = "2.1.0" __maintainer__ = "Peter Molnar" __email__ = "hello@petermolnar.eu" __status__ = "Production" """ silo archiver module of NASG - Copyright (C) 2017 Peter Molnar + Copyright (C) 2017-2018 Peter Molnar This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -39,21 +39,10 @@ import envelope import socket if __name__ == '__main__': - #logging_format = "[%(asctime)s] %(process)d-%(levelname)s " - #logging_format += "%(module)s::%(funcName)s():l%(lineno)d: " - #logging_format += "%(message)s" - - #logging.basicConfig( - #format=logging_format, - #level=logging.DEBUG - #) - #log = logging.getLogger() - # log_config=None prevents creation of access_log and error_log files # since I'm running this from systemctl it already goes into syslog app = Sanic('router') - #app = Sanic('router', log_config=None) - # this is ok to be read-only + # this is read only this way! sdb = shared.SearchDB() @app.route("/oauth1", methods=["GET"]) diff --git a/shared.py b/shared.py index 7accf6c..6180b48 100644 --- a/shared.py +++ b/shared.py @@ -3,16 +3,16 @@ # vim: set fileencoding=utf-8 : __author__ = "Peter Molnar" -__copyright__ = "Copyright 2017, Peter Molnar" +__copyright__ = "Copyright 2017-2018, Peter Molnar" __license__ = "GPLv3" -__version__ = "2.0" +__version__ = "2.1.0" __maintainer__ = "Peter Molnar" __email__ = "hello@petermolnar.eu" __status__ = "Production" """ silo archiver module of NASG - Copyright (C) 2017 Peter Molnar + Copyright (C) 2017-2018 Peter Molnar This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -42,6 +42,7 @@ from slugify import slugify import jinja2 from inspect import getsourcefile import sys +import arrow class CMDLine(object): def __init__(self, executable): @@ -485,6 +486,8 @@ class SearchDB(BaseDB): class WebmentionQueue(BaseDB): + tsform = 'YYYY-MM-DD HH:mm:ss' + def __init__(self): self.fpath = "%s" % config.get('var', 'webmentiondb') super().__init__(self.fpath) @@ -507,7 +510,7 @@ class WebmentionQueue(BaseDB): def finish(self): self.db.close() - def exists(self, source, target): + def exists(self, source, target, dt=arrow.now()): logging.debug( 'checking webmention existence for source: %s ; target: %s', source, @@ -515,15 +518,22 @@ class WebmentionQueue(BaseDB): ) cursor = self.db.cursor() cursor.execute( - '''SELECT id FROM queue WHERE source=? AND target=? LIMIT 1''', + '''SELECT id,timestamp FROM queue WHERE source=? AND target=? ORDER BY timestamp DESC LIMIT 1''', (source,target) ) + rows = cursor.fetchall() if not rows: return False - return int(rows.pop()[0]) + + row = rows.pop() + if arrow.get(row[1], self.tsform).timestamp >= dt.timestamp: + return int(row[0]) + else: + return False def queue(self, source, target): + logging.debug("Queueing webmention: %s to %s", source, target) cursor = self.db.cursor() cursor.execute( '''INSERT INTO queue (source,target) VALUES (?,?);''', ( @@ -585,14 +595,14 @@ class WebmentionQueue(BaseDB): cursor = self.db.cursor() ret = [] cursor.execute( - '''SELECT * FROM queue WHERE source LIKE ? AND status = 0''', + '''SELECT id,timestamp,source,target FROM queue WHERE source LIKE ? AND status = 0''', ('%' + config.get('common', 'domain') + '%',) ) rows = cursor.fetchall() for r in rows: ret.append({ 'id': r[0], - 'dt': r[1], + 'dt': arrow.get(r[1], self.tsform).timestamp, 'source': r[2], 'target': r[3], })