From ed89984c3fecb0d43b67aa627563cdabe43b70f9 Mon Sep 17 00:00:00 2001 From: Peter Molnar Date: Wed, 21 Mar 2018 15:42:36 +0000 Subject: [PATCH] readme updates, extra functions around photo inserts --- README.md | 2 +- nasg.py | 108 ++++++++++++++++++++++++++++++++++++++++++++---------- router.py | 11 ++++-- 3 files changed, 97 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index ee5e6a6..65d8216 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Don't expect anything fancy: my Python Fu has much to learn. ### External dependencies -PHP is in order to use [XRay](https://github.com/aaronpk/XRay/) +PHP is in order to use [XRay](https://github.com/aaronpk/XRay/). Besides that, the rest is for `pandoc` and `exiftool`. ``` apt-get install pandoc exiftool php7.0-bcmath php7.0-bz2 php7.0-cli php7.0-common php7.0-curl php7.0-gd php7.0-imap php7.0-intl php7.0-json php7.0-mbstring php7.0-mcrypt php7.0-mysql php7.0-odbc php7.0-opcache php7.0-readline php7.0-sqlite3 php7.0-xml php7.0-zip python3 python3-pip python3-dev diff --git a/nasg.py b/nasg.py index 5169234..9ac75dc 100644 --- a/nasg.py +++ b/nasg.py @@ -40,6 +40,7 @@ from urllib.parse import urlparse import asyncio from math import ceil import csv +import html import frontmatter import requests import arrow @@ -366,6 +367,7 @@ class Category(NoDupeContainer): tmplfile = "%s.html" % (self.__class__.__name__) r = shared.j2.get_template(tmplfile).render(tmplvars) self.write_html(o, r) + # render feed if page == 1: self.write_feed(posttmpls) @@ -437,12 +439,13 @@ class Singular(object): @property def is_uptodate(self): - if not os.path.isfile(self.htmlfile): - return False - mtime = os.path.getmtime(self.htmlfile) - if mtime >= self.stime: - return True - return False + for f in [self.htmlfile]: + if not os.path.isfile(f): + return False + mtime = os.path.getmtime(f) + if mtime < self.stime: + return False + return True @property def htmlfile(self): @@ -562,7 +565,7 @@ class Singular(object): ]) if self.photo: - corpus = corpus + "\n".join(self.meta.get('tags', [])) + corpus = corpus + "\n".join(self.tags) return corpus @@ -630,9 +633,9 @@ class Singular(object): def html(self): html = "%s" % (self.body) - # add photo - if self.photo: - html = "%s\n%s" % (str(self.photo), html) + ## add photo + #if self.photo: + #html = "%s\n%s" % (str(self.photo), html) return shared.Pandoc().convert(html) @@ -650,7 +653,9 @@ class Singular(object): s = self.meta.get('summary', '') if not s: return s - return shared.Pandoc().convert(s) + if not hasattr(self, '_summary'): + self._summary = shared.Pandoc().convert(s) + return self._summary @property def shortslug(self): @@ -663,6 +668,14 @@ class Singular(object): urls.append("https://brid.gy/publish/flickr") return urls + @property + def tags(self): + return self.meta.get('tags', []) + + @property + def description(self): + return html.escape(self.meta.get('summary', '')) + @property def tmplvars(self): # very simple caching because we might use this 4 times: @@ -688,10 +701,17 @@ class Singular(object): 'is_reply': self.is_reply, 'age': int(self.published.format('YYYY')) - int(arrow.utcnow().format('YYYY')), 'summary': self.summary, + 'description': self.description, 'replies': self.replies, 'reactions': self.reactions, - 'syndicate': self.syndicate + 'syndicate': self.syndicate, + 'tags': self.tags, + 'photo': False } + if self.photo: + self._tmplvars.update({ + 'photo': str(self.photo) + }) return self._tmplvars async def render(self): @@ -1062,10 +1082,29 @@ class WebImage(object): downsized['crop'] ) + @property + def src_size(self): + width = int(self.meta.get('ImageWidth')) + height = int(self.meta.get('ImageHeight')) + + if not self.is_downsizeable: + return width, height + + return self._intermediate_dimension( + shared.config.getint('photo', 'default'), + width, + height + ) + + @property def tmplvars(self): + src_width, src_height = self.src_size + return { 'src': self.src, + 'width': src_width, + 'height': src_height, 'target': self.href, 'css': self.cssclass, 'title': self.title, @@ -1116,14 +1155,23 @@ class Comment(object): @property def author(self): - url = self.meta.get('author').get('url', self.source) - name = self.meta.get('author').get('name', urlparse(url).hostname) - - return { - 'name': name, - 'url': url + r = { + 'name': urlparse(self.source).hostname, + 'url': self.source } + author = self.meta.get('author') + if not author: + return r + + if 'name' in author: + r.update({ 'name': self.meta.get('author').get('name')}) + + if 'url' in author: + r.update({ 'name': self.meta.get('author').get('url')}) + + return r + @property def type(self): # caching, because calling Pandoc is expensive @@ -1341,11 +1389,16 @@ def build(): collector_front = Category() collector_categories = NoDupeContainer() + sitemap = {} + for f, post in content: logging.info("PARSING %s", f) post.init_extras() + # add to sitemap + sitemap.update({ post.url: post.mtime }) + # extend redirects for r in post.redirects: magic.redirects.append((r, post.fname)) @@ -1419,9 +1472,26 @@ def build(): for item in os.listdir(src): s = os.path.join(src, item) d = os.path.join(shared.config.get('common', 'build'), item) - if not os.path.exists(d): + if not os.path.exists(d) or shared.config.getboolean('params', 'force'): logging.debug("copying static file %s to %s", s, d) shutil.copy2(s, d) + if '.html' in item: + url = "%s/%s" % (shared.config.get('site', 'url'), item) + sitemap.update({ + url: os.path.getmtime(s) + }) + + # dump sitemap, if needed + sitemapf = os.path.join(shared.config.get('common', 'build'), 'sitemap.txt') + sitemap_update = True + if os.path.exists(sitemapf): + if int(max(sitemap.values())) <= int(os.path.getmtime(sitemapf)): + sitemap_update = False + + if sitemap_update: + logging.info('writing updated sitemap') + with open(sitemapf, 'wt') as smap: + smap.write("\n".join(sorted(sitemap.keys()))) if __name__ == '__main__': diff --git a/router.py b/router.py index f222b37..eceec34 100644 --- a/router.py +++ b/router.py @@ -30,7 +30,8 @@ __status__ = "Production" """ from sanic import Sanic import sanic.response -from sanic.log import log as logging +#from sanic.log import log as logging +import logging import validators import urllib.parse import shared @@ -48,7 +49,8 @@ if __name__ == '__main__': # log_config=None prevents creation of access_log and error_log files # since I'm running this from systemctl it already goes into syslog - app = Sanic('router', log_config=None) + app = Sanic('router') + #app = Sanic('router', log_config=None) # this is ok to be read-only sdb = shared.SearchDB() @@ -100,7 +102,7 @@ if __name__ == '__main__': # otherwise it'll become read-only for reasons I'm yet to grasp # the actual parsing will be done at site generation time wdb = shared.WebmentionQueue() - wdb.queue(source, target) + wdb.maybe_queue(source, target) # telegram notification, if set shared.notify( @@ -112,4 +114,5 @@ if __name__ == '__main__': response = sanic.response.text("Accepted", status=202) return response - app.run(host="127.0.0.1", port=8008, log_config=None) + #app.run(host="127.0.0.1", port=9002, log_config=None) + app.run(host="127.0.0.1", port=9002)