nasg/nasg.py

2089 lines
59 KiB
Python
Raw Normal View History

2017-05-23 11:14:47 +01:00
#!/usr/bin/env python3
2017-12-17 17:37:32 +00:00
__author__ = "Peter Molnar"
__copyright__ = "Copyright 2017-2019, Peter Molnar"
__license__ = "apache-2.0"
2017-12-17 17:37:32 +00:00
__maintainer__ = "Peter Molnar"
2018-04-30 20:44:04 +01:00
__email__ = "mail@petermolnar.net"
2017-05-23 11:14:47 +01:00
2018-07-20 16:45:42 +01:00
import glob
2017-05-23 11:14:47 +01:00
import os
2018-07-20 16:45:42 +01:00
import time
2017-05-23 11:14:47 +01:00
import re
2018-07-20 16:45:42 +01:00
import imghdr
2017-05-23 11:14:47 +01:00
import asyncio
2018-07-22 11:33:59 +01:00
import sqlite3
import json
import queue
2018-07-20 16:45:42 +01:00
from shutil import copy2 as cp
from math import ceil
2018-07-20 16:45:42 +01:00
from urllib.parse import urlparse
from collections import OrderedDict, namedtuple
import logging
import arrow
2017-05-23 11:14:47 +01:00
import langdetect
import wand.image
2018-07-20 16:45:42 +01:00
import jinja2
import yaml
2019-01-15 21:28:58 +00:00
import frontmatter
from feedgen.feed import FeedGenerator
2018-07-20 16:45:42 +01:00
from bleach import clean
from emoji import UNICODE_EMOJI
from slugify import slugify
import requests
2019-01-15 21:28:58 +00:00
from pandoc import PandocMarkdown
from meta import Exif, GoogleVision, GoogleClassifyText
2018-07-20 16:45:42 +01:00
import settings
import keys
2018-07-20 16:45:42 +01:00
from pprint import pprint
logger = logging.getLogger('NASG')
2018-07-20 16:45:42 +01:00
MarkdownImage = namedtuple(
'MarkdownImage',
['match', 'alt', 'fname', 'title', 'css']
)
REPLY_TYPES = ['webmention', 'in-reply-to', 'reply']
2018-07-20 16:45:42 +01:00
J2 = jinja2.Environment(
loader=jinja2.FileSystemLoader(searchpath=settings.paths.get('tmpl')),
lstrip_blocks=True,
trim_blocks=True
)
RE_MDIMG = re.compile(
r'(?P<match>!\[(?P<alt>[^\]]+)?\]\((?P<fname>[^\s]+)'
r'(?:\s[\'\"](?P<title>[^\"\']+)[\'\"])?\)(?:{(?P<css>[^\}]+)\})?)',
re.IGNORECASE
)
RE_CODE = re.compile(
r'^(?:[~`]{3}).+$',
re.MULTILINE
2018-07-20 16:45:42 +01:00
)
RE_PRECODE = re.compile(
r'<pre class="([^"]+)"><code>'
2018-07-20 16:45:42 +01:00
)
RE_MYURL= re.compile(
r'"?(?P<url>%s/?[^\"]+)"?' % (settings.site.get('url'))
)
def relurl(txt, baseurl):
for url in RE_MYURL.findall(txt):
logger.debug('found URL candidate %s', url)
logger.debug('baseurl is %s', baseurl)
out = os.path.relpath(url, baseurl)
logger.debug('result is %s', out)
if not re.match(r'.*\.[a-z]{2,4}', out):
out = "%s/index.html" % out
#logger.debug('replacing %s with %s', url, out)
txt = txt.replace(url, out)
return txt
J2.filters['relurl'] = relurl
def utfyamldump(data):
return yaml.dump(
data,
default_flow_style=False,
indent=4,
allow_unicode=True
)
2018-11-19 16:16:52 +00:00
def url2slug(url, limit=200):
return slugify(
re.sub(r"^https?://(?:www)?", "", url),
only_ascii=True,
lower=True
)[:limit]
2018-11-19 16:16:52 +00:00
def writepath(fpath, content, mtime=0):
d = os.path.dirname(fpath)
if not os.path.isdir(d):
logger.debug('creating directory tree %s', d)
os.makedirs(d)
if isinstance(content, str):
mode = 'wt'
else:
mode = 'wb'
with open(fpath, mode) as f:
logger.info('writing file %s', fpath)
f.write(content)
class cached_property(object):
""" extermely simple cached_property decorator:
whenever something is called as @cached_property, on first run, the
result is calculated, then the class method is overwritten to be
a property, contaning the result from the method
"""
def __init__(self, method, name=None):
self.method = method
self.name = name or method.__name__
def __get__(self, inst, cls):
if inst is None:
return self
result = self.method(inst)
setattr(inst, self.name, result)
return result
class AQ:
def __init__(self):
self.loop = asyncio.get_event_loop()
self.queue = asyncio.Queue(loop=self.loop)
def put(self, task):
self.queue.put(asyncio.ensure_future(task))
async def consume(self):
while not self.queue.empty():
item = await self.queue.get()
self.queue.task_done()
#asyncio.gather() ?
def run(self):
consumer = asyncio.ensure_future(self.consume())
self.loop.run_until_complete(consumer)
class Webmention(object):
def __init__(self, source, target, dpath, mtime=0):
self.dpath = dpath
self.source = source
self.target = target
if not mtime:
mtime = arrow.utcnow().timestamp
self.mtime = mtime
@property
def fpath(self):
return os.path.join(
self.dpath,
'%s.ping' % (
url2slug(self.target, 200)
)
)
@property
def exists(self):
if not os.path.isfile(self.fpath):
return False
elif os.path.getmtime(self.fpath) > self.mtime:
return True
else:
return False
def save(self, content):
writepath(self.fpath, content)
async def send(self):
if self.exists:
return
telegraph_url = 'https://telegraph.p3k.io/webmention'
telegraph_params = {
'token': '%s' % (keys.telegraph.get('token')),
'source': '%s' % (self.source),
'target': '%s' % (self.target)
}
r = requests.post(telegraph_url, data=telegraph_params)
logger.info(
"sent webmention to telegraph from %s to %s",
self.source,
self.target
)
if r.status_code not in [200, 201, 202]:
logger.error('sending failed: %s %s', r.status_code, r.text)
else:
self.save(r.text)
2018-07-20 16:45:42 +01:00
class MarkdownDoc(object):
2019-01-15 21:28:58 +00:00
@property
def regex(self):
return re.compile(
r'^---\s?[\r\n](?P<meta>.+?)[\r\n]---(?:\s?[\r\n](?P<content>.+))?',
flags=re.MULTILINE|re.DOTALL
)
@property
def mtime(self):
return os.path.getmtime(self.fpath)
@property
def dt(self):
maybe = self.mtime
for key in ['published', 'date']:
t = self.meta.get(key, None)
if t and 'null' != t:
try:
t = arrow.get(t)
if t.timestamp > maybe:
maybe = t.timestamp
except Exception as e:
logger.error(
'failed to parse date: %s for key %s in %s',
t,
key,
self.fpath
)
return maybe
@cached_property
2018-07-20 16:45:42 +01:00
def _parsed(self):
2019-01-15 21:28:58 +00:00
with open(self.fpath, mode='rt') as f:
logger.debug('parsing YAML+MD file %s', self.fpath)
meta, txt = frontmatter.parse(f.read())
return(meta, txt)
@cached_property
def _reparsed(self):
logger.debug('parsing file %s', self.fpath)
with open(self.fpath, mode='r') as f:
txt = f.read()
txt = self.mdregex.match(txt)
if not txt:
logger.error('failed to match YAML + MD doc: %s', self.fpath)
if txt.group('content'):
t = txt.group('content').strip()
else:
t = ''
2019-01-15 21:28:58 +00:00
return (yaml.safe_load(txt.group('meta')), t)
2017-06-12 15:40:30 +01:00
2018-07-20 16:45:42 +01:00
@property
def meta(self):
return self._parsed[0]
2017-06-12 15:40:30 +01:00
2018-07-20 16:45:42 +01:00
@property
def content(self):
return self._parsed[1]
2017-06-12 15:40:30 +01:00
def __pandoc(self, c):
if c and len(c):
2019-01-15 21:28:58 +00:00
c = PandocMarkdown(c)
c = RE_PRECODE.sub(
'<pre><code lang="\g<1>" class="language-\g<1>">', c)
return c
@cached_property
2018-07-20 16:45:42 +01:00
def html_content(self):
c = "%s" % (self.content)
if hasattr(self, 'images') and len(self.images):
for match, img in self.images.items():
c = c.replace(match, str(img))
return self.__pandoc(c)
@cached_property
def html_content_noimg(self):
c = "%s" % (self.content)
if hasattr(self, 'images') and len(self.images):
for match, img in self.images.items():
c = c.replace(match, '')
return self.__pandoc(c)
2017-11-10 16:04:05 +00:00
2018-07-20 16:45:42 +01:00
class Comment(MarkdownDoc):
def __init__(self, fpath):
self.fpath = fpath
2017-06-12 15:40:30 +01:00
@property
2018-07-20 16:45:42 +01:00
def dt(self):
maybe = self.meta.get('date')
if maybe and 'null' != maybe:
2018-07-20 16:45:42 +01:00
dt = arrow.get(maybe)
else:
dt = arrow.get(os.path.getmtime(self.fpath))
return dt
@property
2018-07-20 16:45:42 +01:00
def target(self):
t = urlparse(self.meta.get('target'))
return t.path.rstrip('/').strip('/').split('/')[-1]
@property
2018-07-20 16:45:42 +01:00
def source(self):
return self.meta.get('source')
@property
2018-07-20 16:45:42 +01:00
def author(self):
r = {
'name': urlparse(self.source).hostname,
'url': self.source
}
author = self.meta.get('author')
if not author:
return r
if 'name' in author:
r.update({
'name': self.meta.get('author').get('name')
})
elif 'url' in author:
r.update({
'name': urlparse(self.meta.get('author').get('url')).hostname
})
return r
2017-10-28 19:08:40 +01:00
@property
2018-07-20 16:45:42 +01:00
def type(self):
if len(self.content):
maybe = clean(self.content, strip=True)
if maybe in UNICODE_EMOJI:
return maybe
return self.meta.get('type', 'webmention')
2018-07-20 16:45:42 +01:00
@property
def tmplvars(self):
return {
'author': self.author,
'source': self.source,
'pubtime': self.dt.format(settings.dateformat.get('iso')),
'pubdate': self.dt.format(settings.dateformat.get('display')),
'html': self.html_content,
'type': self.type
}
2018-07-20 16:45:42 +01:00
class Gone(object):
"""
Gone object for delete entries
"""
2018-04-30 20:44:04 +01:00
2018-07-20 16:45:42 +01:00
def __init__(self, fpath):
self.fpath = fpath
self.mtime = os.path.getmtime(fpath)
2018-07-20 16:45:42 +01:00
@property
def source(self):
source, fext = os.path.splitext(os.path.basename(self.fpath))
return source
2018-04-30 20:44:04 +01:00
class Redirect(Gone):
2018-07-20 16:45:42 +01:00
"""
Redirect object for entries that moved
"""
@cached_property
2018-07-20 16:45:42 +01:00
def target(self):
target = ''
with open(self.fpath, 'rt') as f:
target = f.read().strip()
return target
2017-05-23 11:14:47 +01:00
2018-07-20 16:45:42 +01:00
class Singular(MarkdownDoc):
"""
A Singular object: a complete representation of a post, including
all it's comments, files, images, etc
"""
def __init__(self, fpath):
self.fpath = fpath
2018-07-20 16:45:42 +01:00
n = os.path.dirname(fpath)
self.name = os.path.basename(n)
self.category = os.path.basename(os.path.dirname(n))
@cached_property
2018-07-20 16:45:42 +01:00
def files(self):
"""
An array of files present at the same directory level as
the Singular object, excluding hidden (starting with .) and markdown
(ending with .md) files
"""
return [
k
for k in glob.glob(os.path.join(os.path.dirname(self.fpath), '*.*'))
if
not k.startswith('.')
and not k.endswith('.md')
and not k.endswith('.ping')
and not k.endswith('.url')
and not k.endswith('.del')
2018-07-20 16:45:42 +01:00
]
2019-01-15 21:28:58 +00:00
@property
def updated(self):
maybe = self.dt
if len(self.comments):
for c in self.comments.values():
if c.dt > maybe:
maybe = c.dt
return maybe
@cached_property
2018-07-20 16:45:42 +01:00
def comments(self):
"""
An dict of Comment objects keyed with their path, populated from the
same directory level as the Singular objects
"""
comments = {}
2018-07-20 16:45:42 +01:00
files = [
k
for k in glob.glob(os.path.join(os.path.dirname(self.fpath), '*.md'))
if os.path.basename(k) != 'index.md'
]
for f in files:
c = Comment(f)
comments[c.dt.timestamp] = c
return comments
2017-06-12 15:17:29 +01:00
@cached_property
2018-07-20 16:45:42 +01:00
def images(self):
"""
A dict of WebImage objects, populated by:
- images that are present in the Markdown content
- and have an actual image file at the same directory level as
the Singular object
"""
images = {}
for match, alt, fname, title, css in RE_MDIMG.findall(self.content):
mdimg = MarkdownImage(match, alt, fname, title, css)
imgpath = os.path.join(
os.path.dirname(self.fpath),
fname
)
if imgpath in self.files:
if imghdr.what(imgpath):
images.update({match: WebImage(imgpath, mdimg, self)})
return images
2017-05-23 11:14:47 +01:00
@property
def is_page(self):
if self.category.startswith('_'):
return True
return False
2018-06-17 18:30:50 +01:00
@property
2018-07-20 16:45:42 +01:00
def is_front(self):
"""
Returns if the post should be displayed on the front
"""
if self.category in settings.site.get('on_front'):
return True
return False
2018-06-17 18:30:50 +01:00
@property
2018-07-20 16:45:42 +01:00
def is_photo(self):
"""
This is true if there is a file, with the same name as the entry's
directory - so, it's slug -, and that that image believes it's a a
photo.
"""
if len(self.images) != 1:
return False
photo = next(iter(self.images.values()))
2018-07-20 16:45:42 +01:00
maybe = self.fpath.replace("index.md", "%s.jpg" % (self.name))
if photo.fpath == maybe:
2018-07-20 16:45:42 +01:00
return True
return False
2017-05-23 11:14:47 +01:00
@property
def photo(self):
if not self.is_photo:
return None
return next(iter(self.images.values()))
@property
def enclosure(self):
if not self.is_photo:
return None
else:
return {
'mime': self.photo.mime_type,
'size': self.photo.mime_size,
'url': self.photo.href
}
@property
2018-07-20 16:45:42 +01:00
def summary(self):
return self.meta.get('summary', '')
@cached_property
2018-07-20 16:45:42 +01:00
def html_summary(self):
c = self.summary
if c and len(c):
2019-01-15 21:28:58 +00:00
c = PandocMarkdown(self.summary)
return c
2017-10-27 15:56:05 +01:00
@property
2018-07-20 16:45:42 +01:00
def title(self):
if self.is_reply:
return "RE: %s" % self.is_reply
return self.meta.get(
'title',
arrow.get(
self.published).format(
settings.dateformat.get('display'))
)
2017-10-27 15:56:05 +01:00
@property
2018-07-20 16:45:42 +01:00
def tags(self):
return self.meta.get('tags', [])
2017-10-27 15:56:05 +01:00
@property
2018-07-20 16:45:42 +01:00
def syndicate(self):
urls = self.meta.get('syndicate', [])
if self.is_photo:
urls.append("https://brid.gy/publish/flickr")
urls.append("https://fed.brid.gy/")
2018-07-20 16:45:42 +01:00
return urls
2017-06-12 15:40:30 +01:00
def baseN(self, num, b=36,
numerals="0123456789abcdefghijklmnopqrstuvwxyz"):
"""
Creates short, lowercase slug for a number (an epoch) passed
"""
num = int(num)
return ((num == 0) and numerals[0]) or (
self.baseN(
num // b,
b,
numerals
).lstrip(numerals[0]) + numerals[num % b]
)
@property
def shortslug(self):
return self.baseN(self.published.timestamp)
2017-06-12 15:40:30 +01:00
@property
2018-07-20 16:45:42 +01:00
def published(self):
return arrow.get(self.meta.get('published'))
2017-05-23 11:14:47 +01:00
@property
def is_reply(self):
return self.meta.get('in-reply-to', False)
2017-06-12 15:40:30 +01:00
@property
def is_future(self):
2018-07-20 16:45:42 +01:00
if self.published.timestamp > arrow.utcnow().timestamp:
2017-05-23 11:14:47 +01:00
return True
return False
@property
def to_ping(self):
urls = []
w = Webmention(
self.url,
'https://fed.brid.gy',
os.path.dirname(self.fpath),
self.dt
)
urls.append(w)
if self.is_reply:
w = Webmention(
self.url,
self.is_reply,
os.path.dirname(self.fpath),
self.dt
)
urls.append(w)
elif self.is_photo:
w = Webmention(
self.url,
'https://brid.gy/publish/flickr',
os.path.dirname(self.fpath),
self.dt
)
urls.append(w)
return urls
@property
def licence(self):
2018-07-20 16:45:42 +01:00
if self.category in settings.licence:
return settings.licence[self.category]
return settings.site.get('licence')
2017-06-02 11:19:55 +01:00
@property
def lang(self):
lang = 'en'
try:
lang = langdetect.detect("\n".join([
self.meta.get('title', ''),
self.content
]))
2017-11-10 16:04:05 +00:00
except BaseException:
pass
return lang
2017-06-12 15:40:30 +01:00
# @property
# def classification(self):
# c = GoogleClassifyText(self.fpath, self.content, self.lang)
# k = '/Arts & Entertainment/Visual Art & Design/Photographic & Digital Arts'
# if self.is_photo and k not in c.keys():
# c.update({
# k : '1.0'
# })
# return c
2017-06-02 11:19:55 +01:00
@property
2018-07-20 16:45:42 +01:00
def url(self):
return "%s/%s/" % (
settings.site.get('url'),
self.name
)
2017-06-03 12:07:03 +01:00
@property
2018-07-20 16:45:42 +01:00
def replies(self):
r = OrderedDict()
for mtime, c in self.comments.items():
if c.type not in REPLY_TYPES:
continue
r[mtime] = c.tmplvars
2018-07-20 16:45:42 +01:00
return r
@property
2018-07-20 16:45:42 +01:00
def reactions(self):
r = OrderedDict()
for mtime, c in self.comments.items():
if c.type in REPLY_TYPES:
continue
2018-07-20 16:45:42 +01:00
t = "%s" % (c.type)
if t not in r:
r[t] = OrderedDict()
r[t][mtime] = c.tmplvars
return r
2017-06-02 11:19:55 +01:00
@property
def has_code(self):
if RE_CODE.search(self.content):
return True
else:
return False
@property
def event(self):
if 'event' not in self.meta:
return False
event = self.meta.get('event', {})
event.update({
'startdate': arrow.get(event.get('start')).format(settings.dateformat.get('display')),
'starttime': arrow.get(event.get('start')).format(settings.dateformat.get('iso')),
'enddate': arrow.get(event.get('end')).format(settings.dateformat.get('display')),
'endtime': arrow.get(event.get('end')).format(settings.dateformat.get('iso')),
})
return event
@cached_property
def jsonld(self):
r = {
"@context": "http://schema.org",
"@type": "BlogPosting",
"headline": self.title,
"url": self.url,
"mainEntityOfPage": self.url,
"articleBody": self.html_content,
"description": self.html_summary,
"dateModified": self.published.format(settings.dateformat.get('iso')),
"datePublished": self.published.format(settings.dateformat.get('iso')),
"license": self.licence,
"image": settings.author.get('avatar'),
"author": {
"@context": "http://schema.org",
"@type": "Person",
"image": settings.author.get('avatar'),
"url": settings.author.get('url'),
"name": settings.author.get('name'),
"email": settings.author.get('url'),
},
"publisher": {
"@context": "http://schema.org",
"@type": "Organization",
"logo": {
"@context": "http://schema.org",
"@type": "ImageObject",
"url": settings.author.get('avatar'),
},
"url": settings.author.get('url'),
"name": settings.author.get('name'),
"email": settings.author.get('url'),
},
}
if (self.is_photo):
r.update({
'image': self.enclosure.get('url'),
})
return json.dumps(r)
@cached_property
2018-07-20 16:45:42 +01:00
def tmplvars(self):
v = {
2018-07-20 16:45:42 +01:00
'title': self.title,
'category': self.category,
'lang': self.lang,
'slug': self.name,
'is_reply': self.is_reply,
'is_page': self.is_page,
2018-07-20 16:45:42 +01:00
'summary': self.summary,
'html_summary': self.html_summary,
'html_content': self.html_content,
'mtime': self.dt,
2018-07-20 16:45:42 +01:00
'pubtime': self.published.format(settings.dateformat.get('iso')),
'pubdate': self.published.format(settings.dateformat.get('display')),
'year': int(self.published.format('YYYY')),
'licence': self.licence,
'replies': self.replies,
'reactions': self.reactions,
'syndicate': self.syndicate,
'url': self.url,
'review': self.review,
'has_code': self.has_code,
'event': self.event,
'is_photo': self.is_photo,
2018-07-20 16:45:42 +01:00
}
if (self.is_photo):
v.update({
'enclosure': self.enclosure,
'photo': self.photo
})
return v
2017-06-12 15:40:30 +01:00
@property
def review(self):
if 'review' not in self.meta:
return False
r = self.meta.get('review')
rated, outof = r.get('rating').split('/')
r.update({
'rated': rated,
'outof': outof
})
return r
2017-06-02 11:19:55 +01:00
@property
2018-07-20 16:45:42 +01:00
def template(self):
return "%s.j2.html" % (self.__class__.__name__)
2017-06-12 15:40:30 +01:00
@property
2018-07-20 16:45:42 +01:00
def renderdir(self):
return os.path.dirname(self.renderfile)
2017-06-12 15:40:30 +01:00
2018-04-30 20:44:04 +01:00
@property
2018-07-20 16:45:42 +01:00
def renderfile(self):
return os.path.join(
settings.paths.get('build'),
self.name,
'index.html'
)
2018-04-30 20:44:04 +01:00
2017-06-02 11:19:55 +01:00
@property
2018-07-20 16:45:42 +01:00
def exists(self):
if settings.args.get('force'):
2019-01-15 21:28:58 +00:00
logger.debug('rendering required: force mode on')
2018-07-20 16:45:42 +01:00
return False
elif not os.path.exists(self.renderfile):
2019-01-15 21:28:58 +00:00
logger.debug('rendering required: no html yet')
2018-07-20 16:45:42 +01:00
return False
2019-01-15 21:28:58 +00:00
elif self.dt > os.path.getmtime(self.renderfile):
logger.debug('rendering required: self.dt > html mtime')
2018-07-20 16:45:42 +01:00
return False
else:
2019-01-15 21:28:58 +00:00
logger.debug('rendering not required')
2018-07-20 16:45:42 +01:00
return True
@property
def corpus(self):
2018-07-22 11:33:59 +01:00
return "\n".join([
self.title,
self.name,
self.summary,
self.content,
])
async def copyfiles(self):
exclude = ['.md', '.jpg', '.png', '.gif', '.ping']
files = glob.glob(os.path.join(
os.path.dirname(self.fpath),
'*.*'
))
for f in files:
fname, fext = os.path.splitext(f)
if fext.lower() in exclude:
continue
t = os.path.join(
settings.paths.get('build'),
self.name,
os.path.basename(f)
)
if os.path.exists(t) and os.path.getmtime(
f) <= os.path.getmtime(t):
continue
logger.info("copying '%s' to '%s'", f, t)
cp(f, t)
2018-07-20 16:45:42 +01:00
async def render(self):
if self.exists:
return
logger.info("rendering %s", self.name)
2018-07-20 16:45:42 +01:00
r = J2.get_template(self.template).render({
'baseurl': self.url,
2018-07-20 16:45:42 +01:00
'post': self.tmplvars,
'site': settings.site,
'menu': settings.menu,
2018-07-20 16:45:42 +01:00
'author': settings.author,
'meta': settings.meta,
'licence': settings.licence,
'tips': settings.tips,
})
writepath(self.renderfile, r)
#writepath(self.renderfile.replace('.html', '.json'), self.jsonld)
2017-06-12 15:40:30 +01:00
2019-01-15 21:28:58 +00:00
class Home(Singular):
def __init__(self, fpath):
super().__init__(fpath)
self.elements = []
def add(self, category, post):
self.elements.append((category.ctmplvars, post.tmplvars))
@property
def renderdir(self):
return settings.paths.get('build')
@property
def renderfile(self):
return os.path.join(
settings.paths.get('build'),
'index.html'
)
@property
def dt(self):
maybe = super().dt
for cat, post in self.elements:
if post['mtime'] > maybe:
maybe = post['mtime']
return maybe
2019-01-15 21:28:58 +00:00
async def render(self):
if self.exists:
return
logger.info("rendering %s", self.name)
r = J2.get_template(self.template).render({
'baseurl': settings.site.get('url'),
2019-01-15 21:28:58 +00:00
'post': self.tmplvars,
'site': settings.site,
'menu': settings.menu,
2019-01-15 21:28:58 +00:00
'author': settings.author,
'meta': settings.meta,
'licence': settings.licence,
'tips': settings.tips,
'elements': self.elements
})
writepath(self.renderfile, r)
2018-07-20 16:45:42 +01:00
class WebImage(object):
def __init__(self, fpath, mdimg, parent):
logger.debug("loading image: %s", fpath)
2018-07-20 16:45:42 +01:00
self.mdimg = mdimg
self.fpath = fpath
self.parent = parent
self.mtime = os.path.getmtime(self.fpath)
self.fname, self.fext = os.path.splitext(os.path.basename(fpath))
self.resized_images = [
(k, self.Resized(self, k))
for k in settings.photo.get('sizes').keys()
if k < max(self.width, self.height)
]
if not len(self.resized_images):
self.resized_images.append((
max(self.width, self.height),
self.Resized(self, max(self.width, self.height))
))
@property
def is_mainimg(self):
if self.fname == self.parent.name:
return True
return False
@cached_property
def tmplvars(self):
return {
'src': self.src,
'href': self.href,
2018-07-20 16:45:42 +01:00
'width': self.displayed.width,
'height': self.displayed.height,
'title': self.title,
'caption': self.caption,
'exif': self.exif,
'is_photo': self.is_photo,
#'is_mainimg': self.is_mainimg,
}
def __str__(self):
if len(self.mdimg.css):
return self.mdimg.match
tmpl = J2.get_template("%s.j2.html" % (self.__class__.__name__))
return tmpl.render(self.tmplvars)
# @cached_property
# def visionapi(self):
# return GoogleVision(self.fpath, self.src)
# @property
# def onlinecopies(self):
# copies = {}
# for m in self.visionapi.onlinecopies:
# if settings.site.get('domain') not in m:
# copies[m] = True
# return copies.keys()
@cached_property
2018-07-20 16:45:42 +01:00
def meta(self):
return Exif(self.fpath)
2018-06-08 10:14:39 +01:00
@property
2018-07-20 16:45:42 +01:00
def caption(self):
if len(self.mdimg.alt):
return self.mdimg.alt
else:
return self.meta.get('Description', '')
2018-06-08 10:14:39 +01:00
2017-06-28 12:20:26 +01:00
@property
2018-07-20 16:45:42 +01:00
def title(self):
if len(self.mdimg.title):
return self.mdimg.title
else:
return self.meta.get('Headline', self.fname)
2017-06-02 11:19:55 +01:00
2018-07-20 16:45:42 +01:00
@property
def tags(self):
return list(set(self.meta.get('Subject', [])))
2017-06-12 15:40:30 +01:00
2018-07-20 16:45:42 +01:00
@property
def published(self):
return arrow.get(
self.meta.get('ReleaseDate', self.meta.get('ModifyDate'))
)
2017-06-02 11:19:55 +01:00
@property
2018-07-20 16:45:42 +01:00
def width(self):
return int(self.meta.get('ImageWidth'))
2017-06-12 15:40:30 +01:00
@property
2018-07-20 16:45:42 +01:00
def height(self):
return int(self.meta.get('ImageHeight'))
2017-06-02 11:19:55 +01:00
2018-04-30 20:44:04 +01:00
@property
def mime_type(self):
return str(self.meta.get('MIMEType', 'image/jpeg'))
@property
def mime_size(self):
2018-07-20 16:45:42 +01:00
return os.path.getsize(self.linked.fpath)
2018-04-30 20:44:04 +01:00
@property
2018-07-20 16:45:42 +01:00
def displayed(self):
ret = self.resized_images[0][1]
for size, r in self.resized_images:
if size == settings.photo.get('default'):
ret = r
return ret
2017-06-02 11:19:55 +01:00
2018-07-20 16:45:42 +01:00
@property
def linked(self):
m = 0
ret = self.resized_images[0][1]
for size, r in self.resized_images:
if size > m:
m = size
ret = r
return ret
2017-06-12 15:40:30 +01:00
@property
def src(self):
2018-07-20 16:45:42 +01:00
return self.displayed.url
2017-05-23 11:14:47 +01:00
@property
2018-07-20 16:45:42 +01:00
def href(self):
return self.linked.url
@property
def is_photo(self):
2018-07-20 16:45:42 +01:00
r = settings.photo.get('re_author', None)
if not r:
return False
cpr = self.meta.get('Copyright', '')
art = self.meta.get('Artist', '')
# both Artist and Copyright missing from EXIF
if not cpr and not art:
return False
# we have regex, Artist and Copyright, try matching them
2018-07-20 16:45:42 +01:00
if r.search(cpr) or r.search(art):
return True
return False
2017-06-12 15:40:30 +01:00
2017-06-12 15:17:29 +01:00
@property
def exif(self):
2018-07-20 16:45:42 +01:00
exif = {
'camera': '',
'aperture': '',
'shutter_speed': '',
'focallength': '',
'iso': '',
'lens': '',
'geo_latitude': '',
'geo_longitude': '',
}
2017-06-12 15:17:29 +01:00
if not self.is_photo:
return exif
2017-06-12 15:17:29 +01:00
mapping = {
2017-11-10 16:04:05 +00:00
'camera': ['Model'],
'aperture': ['FNumber', 'Aperture'],
'shutter_speed': ['ExposureTime'],
2018-06-08 10:17:57 +01:00
# 'focallength': ['FocalLengthIn35mmFormat', 'FocalLength'],
2017-11-10 16:04:05 +00:00
'focallength': ['FocalLength'],
'iso': ['ISO'],
'lens': ['LensID', 'LensSpec', 'Lens'],
'geo_latitude': ['GPSLatitude'],
'geo_longitude': ['GPSLongitude'],
2017-06-12 15:17:29 +01:00
}
for ekey, candidates in mapping.items():
for candidate in candidates:
maybe = self.meta.get(candidate, None)
if not maybe:
continue
elif 'geo_' in ekey:
exif[ekey] = round(float(maybe), 5)
else:
exif[ekey] = maybe
break
return exif
2017-06-12 15:40:30 +01:00
def _maybe_watermark(self, img):
if not self.is_photo:
return img
2018-07-20 16:45:42 +01:00
wmarkfile = settings.paths.get('watermark')
if not os.path.exists(wmarkfile):
2017-05-23 11:14:47 +01:00
return img
with wand.image.Image(filename=wmarkfile) as wmark:
2018-07-20 16:45:42 +01:00
if self.width > self.height:
w = self.width * 0.3
2017-05-23 11:14:47 +01:00
h = wmark.height * (w / wmark.width)
2018-07-20 16:45:42 +01:00
x = self.width - w - (self.width * 0.01)
y = self.height - h - (self.height * 0.01)
2017-05-23 11:14:47 +01:00
else:
w = self.height * 0.24
2017-05-23 11:14:47 +01:00
h = wmark.height * (w / wmark.width)
2018-07-20 16:45:42 +01:00
x = self.width - h - (self.width * 0.01)
y = self.height - w - (self.height * 0.01)
2017-05-23 11:14:47 +01:00
w = round(w)
h = round(h)
x = round(x)
y = round(y)
wmark.resize(w, h)
2018-07-20 16:45:42 +01:00
if self.width <= self.height:
2017-05-23 11:14:47 +01:00
wmark.rotate(-90)
img.composite(image=wmark, left=x, top=y)
return img
async def downsize(self):
2018-07-20 16:45:42 +01:00
need = False
for size, resized in self.resized_images:
if not resized.exists or settings.args.get('regenerate'):
need = True
break
if not need:
return
2017-05-23 11:14:47 +01:00
with wand.image.Image(filename=self.fpath) as img:
img.auto_orient()
img = self._maybe_watermark(img)
2018-07-20 16:45:42 +01:00
for size, resized in self.resized_images:
if not resized.exists or settings.args.get('regenerate'):
logger.info(
2018-07-20 16:45:42 +01:00
"resizing image: %s to size %d",
os.path.basename(self.fpath),
size
)
await resized.make(img)
class Resized:
def __init__(self, parent, size, crop=False):
self.parent = parent
self.size = size
self.crop = crop
@property
def suffix(self):
return settings.photo.get('sizes').get(self.size, '')
@property
def fname(self):
return "%s%s%s" % (
self.parent.fname,
self.suffix,
self.parent.fext
)
2018-07-20 16:45:42 +01:00
@property
def fpath(self):
return os.path.join(
self.parent.parent.renderdir,
self.fname
)
2018-07-20 16:45:42 +01:00
@property
def url(self):
return "%s/%s/%s" % (
settings.site.get('url'),
self.parent.parent.name,
"%s%s%s" % (
self.parent.fname,
self.suffix,
self.parent.fext
)
)
2018-07-20 16:45:42 +01:00
@property
def relpath(self):
return "%s/%s" % (
self.parent.parent.renderdir.replace(
settings.paths.get('build'), ''
),
self.fname
)
2018-07-20 16:45:42 +01:00
@property
def exists(self):
if os.path.isfile(self.fpath):
if os.path.getmtime(self.fpath) >= self.parent.mtime:
return True
return False
2018-07-20 16:45:42 +01:00
@property
def width(self):
return self.dimensions[0]
@property
def height(self):
return self.dimensions[1]
@property
def dimensions(self):
width = self.parent.width
height = self.parent.height
size = self.size
ratio = max(width, height) / min(width, height)
horizontal = True if (width / height) >= 1 else False
# panorama: reverse "horizontal" because the limit should be on
# the shorter side, not the longer, and make it a bit smaller, than
# the actual limit
# 2.39 is the wide angle cinematic view: anything wider, than that
# is panorama land
if ratio > 2.4 and not self.crop:
size = int(size * 0.6)
horizontal = not horizontal
if (horizontal and not self.crop) \
or (not horizontal and self.crop):
w = size
h = int(float(size / width) * height)
else:
h = size
w = int(float(size / height) * width)
return (w, h)
2017-06-12 15:40:30 +01:00
2018-07-20 16:45:42 +01:00
async def make(self, original):
if not os.path.isdir(os.path.dirname(self.fpath)):
os.makedirs(os.path.dirname(self.fpath))
2017-06-12 15:40:30 +01:00
2018-07-20 16:45:42 +01:00
with original.clone() as thumb:
thumb.resize(self.width, self.height)
2018-07-20 16:45:42 +01:00
if self.crop:
thumb.liquid_rescale(self.size, self.size, 1, 1)
2017-10-27 15:56:05 +01:00
2018-07-20 16:45:42 +01:00
if self.parent.meta.get('FileType', 'jpeg').lower() == 'jpeg':
thumb.compression_quality = 88
2018-07-20 16:45:42 +01:00
thumb.unsharp_mask(
radius=1,
sigma=0.5,
amount=0.7,
threshold=0.5
)
thumb.format = 'pjpeg'
2017-10-27 15:56:05 +01:00
2018-07-20 16:45:42 +01:00
# this is to make sure pjpeg happens
with open(self.fpath, 'wb') as f:
logger.info("writing %s", self.fpath)
2018-07-20 16:45:42 +01:00
thumb.save(file=f)
2017-10-27 15:56:05 +01:00
class PHPFile(object):
@property
def exists(self):
if settings.args.get('force'):
return False
if not os.path.exists(self.renderfile):
return False
if self.mtime > os.path.getmtime(self.renderfile):
return False
return True
@property
def mtime(self):
return os.path.getmtime(
os.path.join(
settings.paths.get('tmpl'),
self.templatefile
)
)
@property
def renderfile(self):
raise ValueError('Not implemented')
@property
def templatefile(self):
raise ValueError('Not implemented')
async def render(self):
if self.exists:
return
await self._render()
class Search(PHPFile):
def __init__(self):
self.fpath = os.path.join(
settings.paths.get('build'),
'search.sqlite'
)
self.db = sqlite3.connect(self.fpath)
self.db.execute('PRAGMA auto_vacuum = INCREMENTAL;')
self.db.execute('PRAGMA journal_mode = MEMORY;')
self.db.execute('PRAGMA temp_store = MEMORY;')
self.db.execute('PRAGMA locking_mode = NORMAL;')
self.db.execute('PRAGMA synchronous = FULL;')
self.db.execute('PRAGMA encoding = "UTF-8";')
self.db.execute('''
CREATE VIRTUAL TABLE IF NOT EXISTS data USING fts4(
url,
mtime,
name,
title,
category,
content,
notindexed=category,
notindexed=url,
notindexed=mtime,
tokenize=porter
)'''
)
self.is_changed = False
def __exit__(self):
if self.is_changed:
self.db.commit()
self.db.execute('PRAGMA auto_vacuum;')
self.db.close()
def check(self, name):
ret = 0
maybe = self.db.execute('''
SELECT
mtime
FROM
data
WHERE
name = ?
''', (name,)).fetchone()
if maybe:
ret = int(maybe[0])
return ret
def append(self, post):
mtime = int(post.mtime)
check = self.check(post.name)
if (check and check < mtime):
self.db.execute('''
DELETE
FROM
data
WHERE
name=?''', (post.name,))
check = False
if not check:
self.db.execute('''
INSERT INTO
data
(url, mtime, name, title, category, content)
VALUES
(?,?,?,?,?,?);
''', (
post.url,
mtime,
post.name,
post.title,
post.category,
post.content
))
self.is_changed = True
@property
def renderfile(self):
return os.path.join(
settings.paths.get('build'),
'search.php'
)
@property
def templatefile(self):
return 'Search.j2.php'
async def _render(self):
r = J2.get_template(self.templatefile).render({
'baseurl': settings.site.get('search'),
'post': {},
'site': settings.site,
'menu': settings.menu,
'author': settings.author,
'meta': settings.meta,
'licence': settings.licence,
'tips': settings.tips,
})
writepath(self.renderfile, r)
class IndexPHP(PHPFile):
def __init__(self):
self.gone = {}
self.redirect = {}
def add_gone(self, uri):
self.gone[uri] = True
def add_redirect(self, source, target):
if target in self.gone:
self.add_gone(source)
else:
if '://' not in target:
target = "%s/%s" % (settings.site.get('url'), target)
self.redirect[source] = target
@property
def renderfile(self):
return os.path.join(
2018-07-20 16:45:42 +01:00
settings.paths.get('build'),
'index.php'
2017-10-27 15:56:05 +01:00
)
@property
def templatefile(self):
2019-01-15 21:28:58 +00:00
return '404.j2.php'
async def _render(self):
r = J2.get_template(self.templatefile).render({
'post': {},
'site': settings.site,
'menu': settings.menu,
'gones': self.gone,
'redirects': self.redirect
})
writepath(self.renderfile, r)
2017-10-27 15:56:05 +01:00
class WebhookPHP(PHPFile):
@property
def renderfile(self):
return os.path.join(
settings.paths.get('build'),
'webhook.php'
)
@property
def templatefile(self):
return 'Webhook.j2.php'
async def _render(self):
r = J2.get_template(self.templatefile).render({
'author': settings.author,
'webmentionio': keys.webmentionio,
'zapier': keys.zapier,
})
writepath(self.renderfile, r)
class MicropubPHP(PHPFile):
@property
def renderfile(self):
return os.path.join(
settings.paths.get('build'),
'micropub.php'
)
@property
def templatefile(self):
return 'Micropub.j2.php'
async def _render(self):
r = J2.get_template(self.templatefile).render({
'site': settings.site,
'menu': settings.menu,
'paths': settings.paths
})
writepath(self.renderfile, r)
2018-07-20 16:45:42 +01:00
class Category(dict):
def __init__(self, name=''):
self.name = name
#self.page = 1
self.trange = 'YYYY'
2017-10-27 15:56:05 +01:00
2018-07-20 16:45:42 +01:00
def __setitem__(self, key, value):
if key in self:
raise LookupError(
"key '%s' already exists, colliding posts are: %s vs %s" % (
key,
self[key].fpath,
value.fpath,
)
)
2018-07-20 16:45:42 +01:00
dict.__setitem__(self, key, value)
2018-07-20 16:45:42 +01:00
@property
def sortedkeys(self):
return list(sorted(self.keys(), reverse=True))
2017-10-27 15:56:05 +01:00
2018-07-20 16:45:42 +01:00
@property
def display(self):
return settings.categorydisplay.get(self.name, '')
2018-07-20 16:45:42 +01:00
@property
def title(self):
if len(self.name):
return "%s - %s" % (self.name, settings.site.get('domain'))
else:
return settings.site.get('title')
@property
2018-07-20 16:45:42 +01:00
def url(self):
if len(self.name):
url = "%s/category/%s/" % (settings.site.get('url'), self.name)
2018-07-20 16:45:42 +01:00
else:
url = '%s/' % (settings.site.get('url'))
2018-07-20 16:45:42 +01:00
return url
2017-10-27 15:56:05 +01:00
@property
def feedurl(self):
return "%sfeed/" % (self.url)
@property
2018-07-20 16:45:42 +01:00
def template(self):
return "%s.j2.html" % (self.__class__.__name__)
2017-10-27 15:56:05 +01:00
@property
def dpath(self):
2018-07-20 16:45:42 +01:00
if len(self.name):
return os.path.join(
settings.paths.get('build'),
'category',
self.name
)
else:
2018-07-20 16:45:42 +01:00
return settings.paths.get('build')
@property
def newest_year(self):
return int(self[self.sortedkeys[0]].published.format(self.trange))
@property
def years(self):
years = {}
for k in self.sortedkeys:
y = int(self[k].published.format(self.trange))
if y not in years:
if y == self.newest_year:
url = self.url
else:
url = "%s%d/" % (self.url, y)
years.update({
y: url
})
return years
@property
def mtime(self):
return self[self.sortedkeys[0]].published.timestamp
@property
def rssfeedfpath(self):
return os.path.join(
self.dpath,
'feed',
'index.xml'
)
@property
def atomfeedfpath(self):
return os.path.join(
self.dpath,
'feed',
'atom.xml'
)
def get_posts(self, start=0, end=-1):
return [
self[k].tmplvars
for k in self.sortedkeys[start:end]
]
def is_uptodate(self, fpath, ts):
if settings.args.get('force'):
return False
if not os.path.exists(fpath):
return False
if os.path.getmtime(fpath) >= ts:
return True
return False
def newest(self, start=0, end=-1):
if start == end:
end = -1
s = sorted(
2019-01-15 21:28:58 +00:00
[self[k].dt for k in self.sortedkeys[start:end]],
reverse=True
)
return s[0]
2019-01-15 21:28:58 +00:00
@property
def ctmplvars(self):
return {
'name': self.name,
'display': self.display,
'url': self.url,
'feed': self.feedurl,
'title': self.title,
}
def tmplvars(self, posts=[], year=False):
baseurl = self.url
if year:
baseurl = '%s/%s/' % (baseurl, year)
2018-07-20 16:45:42 +01:00
return {
'baseurl': baseurl,
'site': settings.site,
'menu': settings.menu,
'author': settings.author,
'meta': settings.meta,
'licence': settings.licence,
'tips': settings.tips,
'category': {
'name': self.name,
'display': self.display,
'url': self.url,
'feed': self.feedurl,
'title': self.title,
'year': year,
'years': self.years,
},
'posts': posts,
2018-07-20 16:45:42 +01:00
}
def indexfpath(self, subpath=None):
if subpath:
return os.path.join(
self.dpath,
subpath,
'index.html'
)
else:
return os.path.join(
self.dpath,
'index.html'
)
async def render_feed(self, xmlformat):
logger.info(
'rendering category "%s" %s feed',
self.name,
xmlformat
)
2018-07-20 16:45:42 +01:00
start = 0
end = int(settings.site.get('pagination'))
2018-07-20 16:45:42 +01:00
fg = FeedGenerator()
fg.id(self.feedurl)
2018-07-20 16:45:42 +01:00
fg.title(self.title)
fg.author({
'name': settings.author.get('name'),
'email': settings.author.get('email')
})
fg.logo('%s/favicon.png' % settings.site.get('url'))
fg.updated(arrow.get(self.mtime).to('utc').datetime)
fg.description(settings.site.get('title'))
for post in reversed(self.get_posts(start, end)):
2018-07-20 16:45:42 +01:00
dt = arrow.get(post.get('pubtime'))
mtime = arrow.get(post.get('mtime'))
2018-07-20 16:45:42 +01:00
fe = fg.add_entry()
fe.id(post.get('url'))
fe.title(post.get('title'))
fe.author({
'name': settings.author.get('name'),
'email': settings.author.get('email')
})
fe.category({
'term': post.get('category'),
'label': post.get('category'),
'scheme': "%s/category/%s/" % (
settings.site.get('url'),
post.get('category')
)
})
2018-07-20 16:45:42 +01:00
fe.published(dt.datetime)
fe.updated(mtime.datetime)
2018-07-20 16:45:42 +01:00
fe.rights('%s %s %s' % (
post.get('licence').upper(),
settings.author.get('name'),
dt.format('YYYY')
))
if xmlformat == 'rss':
fe.link(href=post.get('url'))
fe.content(post.get('html_content'), type='CDATA')
#fe.description(post.get('summary'), isSummary=True)
if 'enclosure' in post:
enc = post.get('enclosure')
fe.enclosure(
enc.get('url'),
"%d" % enc.get('size'),
enc.get('mime')
)
elif xmlformat == 'atom':
fe.link(
href=post.get('url'),
rel='alternate',
type='text/html')
fe.content(src=post.get('url'), type='text/html')
fe.summary(post.get('summary'))
if xmlformat == 'rss':
fg.link(href=self.feedurl)
writepath(self.rssfeedfpath, fg.rss_str(pretty=True))
elif xmlformat == 'atom':
fg.link(href=self.feedurl, rel='self')
fg.link(href=settings.meta.get('hub'), rel='hub')
writepath(self.atomfeedfpath, fg.atom_str(pretty=True))
2017-10-28 19:08:40 +01:00
async def render_flat(self):
r = J2.get_template(self.template).render(
self.tmplvars(self.get_posts())
)
writepath(self.indexfpath(), r)
2017-05-23 11:14:47 +01:00
async def render_archives(self):
for year in self.years.keys():
if year == self.newest_year:
fpath = self.indexfpath()
else:
fpath = self.indexfpath("%d" % (year))
y = arrow.get("%d" % year, self.trange).to('utc')
tsmin = y.floor('year').timestamp
tsmax = y.ceil('year').timestamp
start = len(self.sortedkeys)
end = 0
for index, value in enumerate(self.sortedkeys):
if value <= tsmax and index < start:
start = index
if value >= tsmin and index > end:
end = index
if self.is_uptodate(fpath, self[self.sortedkeys[start]].dt):
logger.info("%s / %d is up to date", self.name, year)
else:
logger.info("updating %s / %d", self.name, year)
logger.info("getting posts from %d to %d", start, end)
r = J2.get_template(self.template).render(
self.tmplvars(
# I don't know why end needs the +1, but without that
# some posts disappear
# TODO figure this out...
self.get_posts(start, end+1),
year
)
)
writepath(fpath, r)
#async def render_archives(self):
#by_time = {}
#for key in self.sortedkeys:
#trange = arrow.get(key).format(self.trange)
#if trange not in by_time:
#by_time.update({
#trange: []
#})
#by_time[trange].append(key)
#keys = list(by_time.keys())
#for p, c, n in zip([None] + keys[:-1], keys, keys[1:] + [None]):
#form = c.format(self.trange)
#if max(keys) == form:
#fpath = self.indexfpath()
#else:
#fpath = self.indexfpath(form)
#try:
#findex = self.sortedkeys.index(by_time[c][0])
#lindex = self.sortedkeys.index(by_time[c][-1])
#newest = self.newest(findex, lindex)
#except Exception as e:
#logger.error(
#'calling newest failed with %s for %s',
#self.name,
#c
#)
#continue
#if self.is_uptodate(fpath, newest):
#logger.info(
#'%s/%s index is up to date',
#self.name,
#form
#)
#continue
#else:
#logger.info(
#'%s/%s index is outdated, generating new',
#self.name,
#form
#)
#r = J2.get_template(self.template).render(
#self.tmplvars(
#[self[k].tmplvars for k in by_time[c]],
#c=form,
#p=p,
#n=n
#)
#)
#writepath(fpath, r)
2019-01-15 21:28:58 +00:00
async def render_feeds(self):
if not self.is_uptodate(self.rssfeedfpath, self.newest()):
logger.info(
'%s RSS feed outdated, generating new',
self.name
)
await self.render_feed('rss')
else:
logger.info(
'%s RSS feed up to date',
self.name
)
2019-01-15 21:28:58 +00:00
if not self.is_uptodate(self.atomfeedfpath, self.newest()):
logger.info(
'%s ATOM feed outdated, generating new',
self.name
)
await self.render_feed('atom')
else:
logger.info(
'%s ATOM feed up to date',
self.name
)
2019-01-15 21:28:58 +00:00
async def render(self):
await self.render_feeds()
if self.display == 'flat':
2019-01-15 21:28:58 +00:00
if not self.is_uptodate(self.indexfpath(), self.newest()):
logger.info(
'%s flat index outdated, generating new',
self.name
)
await self.render_flat()
else:
logger.info(
'%s flat index is up to date',
self.name
)
return
else:
await self.render_archives()
class Sitemap(dict):
@property
def mtime(self):
r = 0
if os.path.exists(self.renderfile):
r = os.path.getmtime(self.renderfile)
return r
def append(self, post):
self[post.url] = post.mtime
@property
def renderfile(self):
return os.path.join(settings.paths.get('build'), 'sitemap.txt')
async def render(self):
if self.mtime >= sorted(self.values())[-1]:
return
with open(self.renderfile, 'wt') as f:
f.write("\n".join(sorted(self.keys())))
class WebmentionIO(object):
def __init__(self):
self.params = {
'token': '%s' % (keys.webmentionio.get('token')),
'since': '%s' % self.since.format(settings.dateformat.get('iso')),
'domain': '%s' % (keys.webmentionio.get('domain'))
}
self.url = 'https://webmention.io/api/mentions'
@property
def since(self):
newest = 0
content = settings.paths.get('content')
for e in glob.glob(os.path.join(content, '*', '*', '*.md')):
if os.path.basename(e) == 'index.md':
continue
# filenames are like [received epoch]-[slugified source url].md
try:
mtime = int(os.path.basename(e).split('-')[0])
except Exception as exc:
logger.error(
'int conversation failed: %s, file was: %s',
exc,
e
)
continue
if mtime > newest:
newest = mtime
return arrow.get(newest+1)
def makecomment(self, webmention):
if 'published_ts' in webmention.get('data'):
maybe = webmention.get('data').get('published')
if not maybe or maybe == 'None':
dt = arrow.get(webmention.get('verified_date'))
else:
dt = arrow.get(webmention.get('data').get('published'))
slug = webmention.get('target').strip('/').split('/')[-1]
# ignore selfpings
if slug == settings.site.get('domain'):
return
fdir = glob.glob(os.path.join(settings.paths.get('content'), '*', slug))
if not len(fdir):
logger.error(
"couldn't find post for incoming webmention: %s",
webmention
)
return
elif len(fdir) > 1:
logger.error(
"multiple posts found for incoming webmention: %s",
webmention
)
return
fdir = fdir.pop()
fpath = os.path.join(
fdir,
"%d-%s.md" % (
dt.timestamp,
url2slug(webmention.get('source'))
)
)
2019-01-15 21:28:58 +00:00
author = webmention.get('data', {}).get('author', None)
if not author:
logger.error('missing author info on webmention; skipping')
return
meta = {
'author': {
2019-01-15 21:28:58 +00:00
'name': author.get('name', ''),
'url': author.get('url', ''),
'photo': author.get('photo', '')
},
'date': dt.format(settings.dateformat.get('iso')),
'source': webmention.get('source'),
'target': webmention.get('target'),
'type': webmention.get('activity').get('type', 'webmention')
}
r = "---\n%s\n---\n\n%s\n" % (
utfyamldump(meta),
webmention.get('data').get('content', '').strip()
)
writepath(fpath, r)
def run(self):
webmentions = requests.get(self.url, params=self.params)
logger.info("queried webmention.io with: %s", webmentions.url)
if webmentions.status_code != requests.codes.ok:
return
try:
mentions = webmentions.json()
for webmention in mentions.get('links'):
self.makecomment(webmention)
except ValueError as e:
logger.error('failed to query webmention.io: %s', e)
pass
def make():
start = int(round(time.time() * 1000))
last = 0
# this needs to be before collecting the 'content' itself
if not settings.args.get('nosync'):
incoming = WebmentionIO()
incoming.run()
queue = AQ()
send = []
2018-07-20 16:45:42 +01:00
content = settings.paths.get('content')
rules = IndexPHP()
micropub = MicropubPHP()
queue.put(micropub.render())
webhook = WebhookPHP()
queue.put(webhook.render())
sitemap = Sitemap()
search = Search()
2018-07-20 16:45:42 +01:00
categories = {}
frontposts = Category()
2019-01-15 21:28:58 +00:00
home = Home(settings.paths.get('home'))
2018-07-20 16:45:42 +01:00
for e in sorted(glob.glob(os.path.join(content, '*', '*', 'index.md'))):
post = Singular(e)
# deal with images, if needed
2018-07-20 16:45:42 +01:00
for i in post.images.values():
queue.put(i.downsize())
for i in post.to_ping:
send.append(i)
# render and arbitrary file copy tasks for this very post
queue.put(post.render())
queue.put(post.copyfiles())
# skip draft posts from anything further
if post.is_future:
2019-01-15 21:28:58 +00:00
logger.info('%s is for the future', post.name)
continue
# add post to search database
search.append(post)
# start populating sitemap
sitemap.append(post)
# populate redirects, if any
rules.add_redirect(post.shortslug, post.url)
# any category starting with '_' are special: they shouldn't have a
# category archive page
if post.is_page:
continue
# populate the category with the post
if post.category not in categories:
categories[post.category] = Category(post.category)
categories[post.category][post.published.timestamp] = post
# add to front, if allowed
if post.is_front:
frontposts[post.published.timestamp] = post
2018-07-20 16:45:42 +01:00
# commit to search database - this saves quite a few disk writes
2018-07-22 11:33:59 +01:00
search.__exit__()
# render search and sitemap
queue.put(search.render())
queue.put(sitemap.render())
# make gone and redirect arrays for PHP
for e in glob.glob(os.path.join(content, '*', '*.del')):
post = Gone(e)
rules.add_gone(post.source)
for e in glob.glob(os.path.join(content, '*', '*.url')):
post = Redirect(e)
rules.add_redirect(post.source, post.target)
# render 404 fallback PHP
queue.put(rules.render())
# render categories
2018-07-20 16:45:42 +01:00
for category in categories.values():
2019-01-15 21:28:58 +00:00
home.add(category, category.get(category.sortedkeys[0]))
queue.put(category.render())
2019-01-15 21:28:58 +00:00
queue.put(frontposts.render_feeds())
queue.put(home.render())
# actually run all the render & copy tasks
queue.run()
# copy static files
for e in glob.glob(os.path.join(content, '*.*')):
2019-01-15 21:28:58 +00:00
if e.endswith('.md'):
continue
t = os.path.join(settings.paths.get('build'),os.path.basename(e))
2018-07-20 16:45:42 +01:00
if os.path.exists(t) and os.path.getmtime(e) <= os.path.getmtime(t):
continue
cp(e, t)
2018-07-20 16:45:42 +01:00
end = int(round(time.time() * 1000))
logger.info('process took %d ms' % (end - start))
if not settings.args.get('nosync'):
# upload site
logger.info('starting syncing')
os.system(
"rsync -avuhH --delete-after %s/ %s/" % (
settings.paths.get('build'),
'%s/%s' % (settings.syncserver,
settings.paths.get('remotewww'))
)
)
logger.info('syncing finished')
if not settings.args.get('nosync'):
logger.info('sending webmentions')
for wm in send:
queue.put(wm.send())
queue.run()
logger.info('sending webmentions finished')
2017-05-23 11:13:35 +01:00
if __name__ == '__main__':
2018-07-20 16:45:42 +01:00
make()