nasg/nasg.py

1304 lines
38 KiB
Python
Raw Normal View History

2017-05-23 11:14:47 +01:00
#!/usr/bin/env python3
import os
import re
2017-05-23 11:13:35 +01:00
import logging
import configparser
2017-05-23 11:14:47 +01:00
import json
import glob
import argparse
import shutil
from urllib.parse import urlparse
2017-05-23 11:14:47 +01:00
import asyncio
from math import ceil
import csv
import sqlite3
2017-10-28 19:08:40 +01:00
import magic
2017-05-23 11:14:47 +01:00
import frontmatter
import arrow
2017-05-23 11:14:47 +01:00
import langdetect
import wand.image
2017-10-27 15:56:05 +01:00
from emoji import UNICODE_EMOJI
2017-06-12 15:40:30 +01:00
import shared
from pprint import pprint
2017-05-23 11:14:47 +01:00
class MagicPHP(object):
name = 'index.php'
def __init__(self):
# init 'gone 410' array
self.gones = []
f = shared.config.get('var', 'gone')
if os.path.isfile(f):
with open(f) as csvfile:
reader = csv.reader(csvfile, delimiter=' ')
for row in reader:
self.gones.append(row[0])
# init manual redirects array
self.redirects = []
f = shared.config.get('var', 'redirects')
if os.path.isfile(f):
with open(f) as csvfile:
reader = csv.reader(csvfile, delimiter=' ')
for row in reader:
self.redirects.append((row[0], row[1]))
@property
def phpfile(self):
return os.path.join(
shared.config.get('common', 'build'),
self.name
)
async def render(self):
logging.info('saving %s' % (self.name))
o = self.phpfile
tmplfile = "%s.html" % (__class__.__name__)
r = shared.j2.get_template(tmplfile).render({
'site': shared.site,
'redirects': self.redirects,
'gones': self.gones
})
with open(o, 'wt') as out:
logging.debug('writing file %s' % (o))
out.write(r)
class NoDupeContainer(object):
""" Base class to hold keys => data dicts with errors on dupes """
2017-06-12 15:40:30 +01:00
def __init__(self):
self.data = {}
self.default = None
2017-06-12 15:40:30 +01:00
def append(self, key, value):
# all clear
if key not in self.data:
self.data.update({key: value})
2017-06-12 15:40:30 +01:00
return
# problem
logging.error(
"duplicate key error when populating %s: %s",
self.__class__.__name__,
key
)
logging.error(
"current: %s",
self.data.get(key)
)
logging.error(
"problem: %s",
value
)
return
# TODO: return ordered version of data
2017-06-12 15:40:30 +01:00
def __getitem__(self, key):
return self.data.get(key, self.default)
2017-06-12 15:40:30 +01:00
#def __delitem__(self, key):
#return del(self.data[key])
2017-06-12 15:40:30 +01:00
def __setitem__(self, key, value):
return self.append(key, value)
def __contains__(self, key):
if key in self.data.keys():
return True
return False
2017-06-12 15:40:30 +01:00
def __len__(self):
return len(self.data.keys())
2017-06-12 15:40:30 +01:00
def __next__(self):
try:
r = self.data.next()
except:
raise StopIteration()
return r
def __iter__(self):
for k, v in self.data.items():
yield (k, v)
return
#def __repr__(self):
#return json.dumps(self.data)
#def __str__(self):
#return "iteration container with %s items" % (len(self.data.keys()))
2017-06-12 15:40:30 +01:00
class FContainer(NoDupeContainer):
""" This is a container that holds a lists of files based on Container so it errors on duplicate slugs and is popolated with recorsive glob """
def __init__(self, dirs=[''], extensions=['*']):
super().__init__()
files = []
for ext in extensions:
for p in dirs:
files.extend(glob.iglob(
os.path.join(p,'*.%s' % (ext)),
recursive=True
))
# eliminate duplicates
files = list(set(files))
for fpath in files:
fname = os.path.basename(fpath)
self.append(fname, fpath)
class Content(FContainer):
""" This is a container that holds markdown files that are parsed when the container is populated on the fly; based on FContainer which is a Container """
def __init__(self):
dirs=[os.path.join(shared.config.get('dirs', 'content'), "**")]
extensions=['md', 'jpg']
super().__init__(dirs, extensions)
for fname, fpath in self.data.items():
self.data.update({fname: Singular(fpath)})
class Category(NoDupeContainer):
""" A Category which holds pubtime (int) => Singular data """
indexfile = 'index.html'
2017-10-28 19:08:40 +01:00
feedfile = 'index.xml'
feeddir = 'feed'
pagedir = 'page'
taxonomy = 'category'
def __init__(self, name=''):
self.name = name
super().__init__()
def append(self, post):
return super().append(post.pubtime, post)
2017-06-12 15:40:30 +01:00
@property
def mtime(self):
return int(sorted(self.data.keys(), reverse=True)[0])
@property
def is_uptodate(self):
index = os.path.join(self.path_paged(), self.indexfile)
if not os.path.isfile(index):
return False
mtime = os.path.getmtime(index)
if mtime == self.mtime:
return True
return False
@property
def title(self):
# TODO proper title
return self.name
2017-10-28 19:08:40 +01:00
@property
def url(self):
if self.name:
2017-10-28 19:08:40 +01:00
url = "/%s/%s/" % (
self.taxonomy,
self.name,
)
else:
2017-10-28 19:08:40 +01:00
url = '/'
return url
def path_paged(self, page=1, feed=False):
x = shared.config.get('common', 'build')
2017-05-23 11:14:47 +01:00
if self.name:
x = os.path.join(
x,
self.taxonomy,
self.name,
)
2017-06-02 11:19:55 +01:00
if page == 1:
if feed:
x = os.path.join(x, self.feeddir)
else:
x = os.path.join(x, self.pagedir, "%s" % page)
2017-06-02 11:19:55 +01:00
if not os.path.isdir(x):
os.makedirs(x)
return x
2017-05-23 11:14:47 +01:00
2017-06-02 11:19:55 +01:00
def write_html(self, path, content):
with open(path, 'wt') as out:
logging.debug('writing file %s' % (path))
out.write(content)
os.utime(path, (self.mtime, self.mtime))
async def render(self):
pagination = shared.config.getint('display', 'pagination')
pages = ceil(len(self.data) / pagination)
page = 1
while page <= pages:
# list relevant post templates
start = int((page-1) * pagination)
end = int(start + pagination)
posttmpls = [
self.data[k].tmplvars
for k in list(sorted(
self.data.keys(),
reverse=True
))[start:end]
]
# define data for template
tmplvars = {
'taxonomy': {
'title': self.title,
'name': self.name,
'page': page,
'total': pages,
'perpage': pagination,
2017-10-28 19:08:40 +01:00
'lastmod': arrow.get(self.mtime).format(shared.ARROWFORMAT['rcf']),
'url': self.url,
'feed': "%s/%s/" % (
self.url,
shared.config.get('site', 'feed')
),
},
'site': shared.site,
'posts': posttmpls,
}
# render HTML
dirname = self.path_paged(page)
o = os.path.join(dirname, self.indexfile)
logging.info("Rendering page %d/%d of category %s to %s", page, pages, self.name, o)
tmplfile = "%s.html" % (__class__.__name__)
r = shared.j2.get_template(tmplfile).render(tmplvars)
self.write_html(o, r)
# render feed
if 1 == page:
dirname = self.path_paged(page, feed=True)
o = os.path.join(dirname, self.feedfile)
logging.info("Rendering feed of category %s to %s", self.name, o)
tmplfile = "%s_%s.html" % (__class__.__name__, self.feeddir)
r = shared.j2.get_template(tmplfile).render(tmplvars)
self.write_html(o, r)
# inc. page counter
page = page+1
2017-05-23 11:14:47 +01:00
class Singular(object):
indexfile = 'index.html'
def __init__(self, fpath):
logging.debug("initiating singular object from %s", fpath)
self.fpath = fpath
self.mtime = os.path.getmtime(self.fpath)
self.stime = self.mtime
self.fname, self.fext = os.path.splitext(os.path.basename(self.fpath))
self.category = os.path.basename(os.path.dirname(self.fpath))
self._images = NoDupeContainer()
if '.md' == self.fext:
with open(self.fpath, mode='rt') as f:
self.fm = frontmatter.parse(f.read())
self.meta, self.content = self.fm
self.photo = None
elif '.jpg' == self.fext:
self.photo = WebImage(self.fpath)
self.meta = self.photo.fm_meta
self.content = self.photo.fm_content
self.photo.inline = False
self.photo.cssclass = 'u-photo'
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
def init_extras(self):
self.process_webmentions()
c = self.comments
# TODO this should be async
def process_webmentions(self):
wdb = shared.WebmentionQueue()
queued = wdb.get_queued(self.url)
for incoming in queued:
wm = Webmention(
incoming.get('id'),
incoming.get('source'),
incoming.get('target'),
incoming.get('dt')
)
wm.run()
wdb.entry_done(incoming.get('id'))
wdb.finish()
2017-05-23 11:14:47 +01:00
@property
def redirects(self):
r = self.meta.get('redirect', [])
r.append(self.shortslug)
return list(set(r))
2017-06-12 15:17:29 +01:00
@property
def is_uptodate(self):
if not os.path.isfile(self.htmlfile):
return False
mtime = os.path.getmtime(self.htmlfile)
if mtime >= self.stime:
return True
return False
2017-05-23 11:14:47 +01:00
@property
def htmlfile(self):
return os.path.join(
shared.config.get('common', 'build'),
self.fname,
self.indexfile
2017-05-23 11:14:47 +01:00
)
@property
def images(self):
if self.photo:
self._images.append(self.fname, self.photo)
# add inline images
for shortcode, alt, fname, title, css in self.inline_images:
# this does the appending automatically
im = self._find_image(fname)
return self._images
2017-10-27 15:56:05 +01:00
@property
def comments(self):
comments = NoDupeContainer()
cfiles = []
lookin = [*self.redirects, self.fname]
for d in lookin:
maybe = glob.glob(
os.path.join(
shared.config.get('dirs', 'comment'),
d,
'*.md'
)
)
cfiles = [*cfiles, *maybe]
for cpath in cfiles:
cmtime = os.path.getmtime(cpath)
if cmtime > self.stime:
self.stime = cmtime
2017-10-27 15:56:05 +01:00
c = Comment(cpath)
comments.append(c.mtime, c)
return comments
@property
def replies(self):
r = {}
for mtime, c in self.comments:
if 'webmention' == c.type:
r.update({mtime:c.tmplvars})
return sorted(r.items())
@property
def reactions(self):
r = {}
for mtime, c in self.comments:
if 'webmention' == c.type:
continue
if c.type not in r:
r[c.type] = {}
r[c.type].update({mtime:c.tmplvars})
for icon, comments in r.items():
r[icon] = sorted(comments.items())
return r
@property
def exif(self):
if not self.photo:
return {}
return self.photo.exif
2017-06-12 15:40:30 +01:00
@property
def published(self):
return arrow.get(self.meta.get('published', self.mtime))
2017-05-23 11:14:47 +01:00
@property
def updated(self):
u = self.meta.get('updated', False)
if u:
u = arrow.get(u)
return u
2017-06-12 15:40:30 +01:00
@property
def pubtime(self):
return int(self.published.timestamp)
2017-05-23 11:14:47 +01:00
@property
def is_reply(self):
return self.meta.get('in-reply-to', False)
2017-06-12 15:40:30 +01:00
@property
def is_future(self):
now = arrow.utcnow().timestamp
if self.pubtime > now:
2017-05-23 11:14:47 +01:00
return True
return False
@property
def licence(self):
l = shared.config.get('licence', self.category,
fallback=shared.config.get('licence', 'default',))
return {
'text': 'CC %s 4.0' % l.upper(),
'url': 'https://creativecommons.org/licenses/%s/4.0/' % l,
}
2017-05-23 11:14:47 +01:00
@property
def corpus(self):
corpus = "\n".join([
"%s" % self.meta.get('title', ''),
"%s" % self.fname,
"%s" % self.meta.get('summary', ''),
"%s" % self.content,
])
2017-05-23 11:13:35 +01:00
if self.photo:
corpus = corpus + "\n".join(self.meta.get('tags', []))
2017-06-12 15:40:30 +01:00
return corpus
2017-06-02 11:19:55 +01:00
@property
def lang(self):
# default is English, this will only be changed if the try
# succeeds and actually detects a language
lang = 'en'
try:
lang = langdetect.detect("\n".join([
self.fname,
self.meta.get('title', ''),
self.content
]))
except:
pass
return lang
2017-06-12 15:40:30 +01:00
def _find_image(self, fname):
2017-10-28 19:08:40 +01:00
fname = os.path.basename(fname)
pattern = os.path.join(
shared.config.get('dirs', 'files'),
2017-10-28 19:08:40 +01:00
'**',
fname
)
logging.debug('trying to locate image %s in %s', fname, pattern)
maybe = glob.glob(pattern)
2017-06-02 11:19:55 +01:00
if not maybe:
2017-10-28 19:08:40 +01:00
logging.error('image not found: %s', fname)
return None
2017-06-12 15:40:30 +01:00
2017-10-28 19:08:40 +01:00
maybe = maybe.pop()
logging.debug('image found: %s', maybe)
if fname not in self._images:
2017-10-28 19:08:40 +01:00
im = WebImage(maybe)
self._images.append(fname,im)
return self._images[fname]
2017-06-02 11:19:55 +01:00
@property
def inline_images(self):
return shared.REGEX['mdimg'].findall(self.content)
2017-06-03 12:07:03 +01:00
@property
def url(self):
return "%s/%s" % (shared.config.get('site', 'url'), self.fname)
@property
def body(self):
body = "%s" % (self.content)
# get inline images, downsize them and convert them to figures
for shortcode, alt, fname, title, css in self.inline_images:
2017-10-28 19:08:40 +01:00
#fname = os.path.basename(fname)
im = self._find_image(fname)
if not im:
continue
2017-06-02 11:19:55 +01:00
im.alt = alt
im.title = title
im.cssclass = css
body = body.replace(shortcode, str(im))
2017-06-02 11:19:55 +01:00
# TODO if multiple meta images, inline all except the first
# which will be added at the HTML stage or as enclosure to the feed
return body
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def html(self):
html = "%s" % (self.body)
2017-06-12 15:40:30 +01:00
# add photo
if self.photo:
html = "%s\n%s" % (str(self.photo), html)
2017-06-02 11:19:55 +01:00
return shared.Pandoc().convert(html)
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def title(self):
maybe = self.meta.get('title', False)
if maybe:
return maybe
if self.is_reply:
return "RE: %s" % self.is_reply
return self.published.format(shared.ARROWFORMAT['display'])
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def summary(self):
s = self.meta.get('summary', '')
if not s:
return s
return shared.Pandoc().convert(s)
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def shortslug(self):
return shared.baseN(self.pubtime)
2017-06-12 15:40:30 +01:00
2017-10-28 19:08:40 +01:00
@property
def enclosure(self):
if not self.photo:
return {}
return {
'length': os.path.getsize(self.photo.fpath),
'url': self.photo.href,
'mime': magic.Magic(mime=True).from_file(self.photo.fpath),
}
2017-06-28 12:20:26 +01:00
@property
def tmplvars(self):
# very simple caching because we might use this 4 times:
# post HTML, category, front posts and atom feed
if not hasattr(self, '_tmplvars'):
self._tmplvars = {
'title': self.title,
'pubtime': self.published.format(shared.ARROWFORMAT['iso']),
'pubdate': self.published.format(shared.ARROWFORMAT['display']),
2017-10-28 19:08:40 +01:00
'pubrfc': self.published.format(shared.ARROWFORMAT['rcf']),
'category': self.category,
'html': self.html,
'lang': self.lang,
'slug': self.fname,
'shortslug': self.shortslug,
'licence': self.licence,
#'sourceurl': self.sourceurl,
'is_reply': self.is_reply,
'age': int(self.published.format('YYYY')) - int(arrow.utcnow().format('YYYY')),
2017-10-27 15:56:05 +01:00
'summary': self.summary,
'replies': self.replies,
'reactions': self.reactions,
2017-10-28 19:08:40 +01:00
'enclosure': self.enclosure,
}
return self._tmplvars
2017-06-28 12:20:26 +01:00
async def render(self):
logging.info('rendering %s' % (self.fname))
o = self.htmlfile
tmplfile = "%s.html" % (__class__.__name__)
r = shared.j2.get_template(tmplfile).render({
'post': self.tmplvars,
'site': shared.site,
})
d = os.path.dirname(o)
if not os.path.isdir(d):
logging.debug('creating directory %s' % (d))
os.makedirs(d)
with open(o, 'wt') as out:
logging.debug('writing file %s' % (o))
out.write(r)
os.utime(o, (self.mtime, self.mtime))
2017-06-02 11:19:55 +01:00
def __repr__(self):
return "%s/%s" % (self.category, self.fname)
2017-06-02 11:19:55 +01:00
2017-06-12 15:40:30 +01:00
class WebImage(object):
def __init__(self, fpath):
logging.info("parsing image: %s", fpath)
self.fpath = fpath
self.mtime = os.path.getmtime(self.fpath)
bname = os.path.basename(fpath)
self.fname, self.fext = os.path.splitext(bname)
self.title = ''
self.alt = bname
self.target = ''
self.cssclass = ''
2017-06-02 11:19:55 +01:00
@property
def fm_content(self):
return self.meta.get('Description', '')
2017-06-12 15:40:30 +01:00
@property
def fm_meta(self):
return {
'published': self.meta.get('ReleaseDate',
self.meta.get('ModifyDate')
),
'title': self.meta.get('Headline', self.fname),
'tags': list(set(self.meta.get('Subject', []))),
}
2017-06-02 11:19:55 +01:00
@property
def href(self):
if len(self.target):
return self.target
2017-06-02 11:19:55 +01:00
if not self.is_downsizeable:
return False
2017-05-23 11:14:47 +01:00
return self.sizes[-1][1]['url']
2017-06-12 15:40:30 +01:00
@property
def src(self):
# is the image is too small to downsize, it will be copied over
# so the link needs to point at
src = "/%s/%s" % (
shared.config.get('common', 'files'),
"%s%s" % (self.fname, self.fext)
2017-05-23 11:14:47 +01:00
)
if self.is_downsizeable:
try:
src = [e for e in self.sizes if e[0] == shared.config.getint('photo', 'default')][0][1]['url']
except:
pass
return src
2017-05-23 11:14:47 +01:00
@property
def meta(self):
if not hasattr(self, '_exif'):
# reading EXIF is expensive enough even with a static generator
# to consider caching it, so I'll do that here
cpath = os.path.join(
shared.config.get('var', 'cache'),
"%s.exif.json" % self.fname
2017-05-23 11:14:47 +01:00
)
if os.path.exists(cpath):
cmtime = os.path.getmtime(cpath)
if cmtime >= self.mtime:
with open(cpath, 'rt') as f:
self._exif = json.loads(f.read())
return self._exif
self._exif = shared.ExifTool(self.fpath).read()
if not os.path.isdir(shared.config.get('var', 'cache')):
os.makedirs(shared.config.get('var', 'cache'))
with open(cpath, 'wt') as f:
f.write(json.dumps(self._exif))
return self._exif
@property
def is_photo(self):
# missing regex from config
if 'photo' not in shared.REGEX:
logging.debug('%s photo regex missing from config')
return False
cpr = self.meta.get('Copyright', '')
art = self.meta.get('Artist', '')
# both Artist and Copyright missing from EXIF
if not cpr and not art:
logging.debug('%s Artist or Copyright missing from EXIF')
return False
2017-05-23 11:14:47 +01:00
# we have regex, Artist and Copyright, try matching them
pattern = re.compile(shared.config.get('photo', 'regex'))
if pattern.search(cpr) or pattern.search(art):
return True
logging.debug('%s patterns did not match')
return False
2017-06-12 15:40:30 +01:00
2017-06-12 15:17:29 +01:00
@property
def exif(self):
exif = {}
2017-06-12 15:17:29 +01:00
if not self.is_photo:
return exif
2017-06-12 15:17:29 +01:00
mapping = {
'camera': ['Model'],
'aperture': ['FNumber','Aperture'],
'shutter_speed': ['ExposureTime'],
'focallength': ['FocalLengthIn35mmFormat', 'FocalLength'],
'iso': ['ISO'],
'lens': ['LensID', 'LensSpec', 'Lens',],
#'date': ['CreateDate','DateTimeOriginal'],
'geo_latitude': ['GPSLatitude'],
'geo_longitude': ['GPSLongitude'],
2017-06-12 15:17:29 +01:00
}
for ekey, candidates in mapping.items():
for candidate in candidates:
maybe = self.meta.get(candidate, None)
if not maybe:
continue
elif 'geo_' in ekey:
exif[ekey] = round(float(maybe), 5)
else:
exif[ekey] = maybe
break
return exif
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def sizes(self):
sizes = []
_max = max(
int(self.meta.get('ImageWidth')),
int(self.meta.get('ImageHeight'))
)
2017-06-03 12:07:03 +01:00
for size in shared.config.options('downsize'):
if _max < int(size):
continue
2017-05-23 11:14:47 +01:00
name = '%s_%s%s' % (
self.fname,
shared.config.get('downsize', size),
self.fext
)
2017-05-23 11:14:47 +01:00
fpath = os.path.join(
shared.config.get('common', 'build'),
shared.config.get('common', 'files'),
name
)
2017-05-23 11:13:35 +01:00
exists = os.path.isfile(fpath)
# in case there is a downsized image compare against the main file's
# mtime and invalidate the existing if it's older
if exists:
mtime = os.path.getmtime(fpath)
if self.mtime > mtime:
exists = False
2017-05-23 11:13:35 +01:00
sizes.append((
int(size),
{
'fpath': fpath,
'exists': os.path.isfile(fpath),
'url': "%s/%s/%s" % (
shared.config.get('site', 'url'),
shared.config.get('common', 'files'),
name
),
'crop': shared.config.getboolean(
'crop',
size,
fallback=False
)
}
))
return sorted(sizes, reverse=False)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def is_downsizeable(self):
""" Check if the image is large enought to downsize it """
2017-05-23 11:14:47 +01:00
ftype = self.meta.get('FileType', None)
if not ftype:
return False
elif ftype.lower() != 'jpeg' and ftype.lower() != 'png':
return False
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
_max = max(
int(self.meta.get('ImageWidth')),
int(self.meta.get('ImageHeight'))
2017-05-23 11:14:47 +01:00
)
_min = shared.config.getint('photo','default')
if _max > _min:
return True
2017-05-23 11:14:47 +01:00
return False
2017-06-12 15:40:30 +01:00
def _maybe_watermark(self, img):
2017-05-23 11:14:47 +01:00
""" Composite image by adding watermark file over it """
if not self.is_photo:
logging.debug("not watermarking: not a photo")
return img
wmarkfile = shared.config.get('photo', 'watermark')
2017-05-23 11:14:47 +01:00
if not os.path.isfile(wmarkfile):
logging.debug("not watermarking: watermark not found")
2017-05-23 11:14:47 +01:00
return img
logging.debug("%s is a photo, applying watermarking", self.fpath)
2017-05-23 11:14:47 +01:00
with wand.image.Image(filename=wmarkfile) as wmark:
if img.width > img.height:
w = img.width * 0.2
2017-05-23 11:14:47 +01:00
h = wmark.height * (w / wmark.width)
x = img.width - w - (img.width * 0.01)
y = img.height - h - (img.height * 0.01)
else:
w = img.height * 0.16
h = wmark.height * (w / wmark.width)
x = img.width - h - (img.width * 0.01)
y = img.height - w - (img.height * 0.01)
w = round(w)
h = round(h)
x = round(x)
y = round(y)
wmark.resize(w, h)
if img.width <= img.height:
wmark.rotate(-90)
img.composite(image=wmark, left=x, top=y)
2017-05-23 11:14:47 +01:00
return img
def _copy(self):
fname = "%s%s" % (self.fname, self.fext)
fpath = os.path.join(
shared.config.get('common', 'build'),
shared.config.get('common', 'files'),
fname
)
if os.path.isfile(fpath):
mtime = os.path.getmtime(fpath)
if self.mtime <= mtime:
return
logging.info("copying %s to build dir", fname)
shutil.copy(self.fpath, fpath)
2017-05-23 11:14:47 +01:00
def _intermediate_dimension(self, size, width, height, crop=False):
""" Calculate intermediate resize dimension and return a tuple of width, height """
2017-05-23 11:14:47 +01:00
size = int(size)
if (width > height and not crop) \
or (width < height and crop):
w = size
h = int(float(size / width) * height)
2017-05-23 11:13:35 +01:00
else:
2017-05-23 11:14:47 +01:00
h = size
w = int(float(size / height) * width)
return (w, h)
def _intermediate(self, img, size, target, crop=False):
if img.width < size and img.height < size:
2017-05-23 11:14:47 +01:00
return False
with img.clone() as thumb:
width, height = self._intermediate_dimension(
2017-05-23 11:14:47 +01:00
size,
img.width,
img.height,
crop
)
thumb.resize(width, height)
if crop:
thumb.liquid_rescale(size, size, 1, 1)
if self.meta.get('FileType', 'jpeg').lower() == 'jpeg':
thumb.compression_quality = 86
thumb.unsharp_mask(
radius=0,
sigma=0.5,
amount=1,
threshold=0.03
2017-05-23 11:13:35 +01:00
)
2017-05-23 11:14:47 +01:00
thumb.format = 'pjpeg'
# this is to make sure pjpeg happens
with open(target, 'wb') as f:
logging.info("writing %s", target)
2017-05-23 11:14:47 +01:00
thumb.save(file=f)
@property
def needs_downsize(self):
needed = False
for (size, downsized) in self.sizes:
if downsized.get('exists', False):
logging.debug("size %d exists: %s", size, downsized.get('fpath'))
continue
logging.debug("size %d missing: %s", size, downsized.get('fpath'))
needed = True
return needed
2017-05-23 11:14:47 +01:00
async def downsize(self):
2017-05-23 11:14:47 +01:00
if not self.is_downsizeable:
return self._copy()
2017-05-23 11:14:47 +01:00
if not self.needs_downsize and not shared.config.getboolean('params', 'regenerate'):
return
2017-05-23 11:14:47 +01:00
build_files = os.path.join(
shared.config.get('common', 'build'),
shared.config.get('common', 'files'),
)
2017-05-23 11:14:47 +01:00
if not os.path.isdir(build_files):
os.makedirs(build_files)
2017-05-23 11:14:47 +01:00
logging.info("downsizing %s%s", self.fname, self.fext)
2017-05-23 11:14:47 +01:00
with wand.image.Image(filename=self.fpath) as img:
img.auto_orient()
img = self._maybe_watermark(img)
for (size, downsized) in self.sizes:
self._intermediate(
img,
size,
downsized['fpath'],
downsized['crop']
2017-06-12 15:17:29 +01:00
)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def tmplvars(self):
return {
'src': self.src,
'target': self.href,
'css': self.cssclass,
2017-05-23 11:14:47 +01:00
'title': self.title,
'alt': self.alt,
2017-05-23 11:14:47 +01:00
'exif': self.exif,
'is_photo': self.is_photo,
'author': self.meta.get('Artist', ''),
2017-05-23 11:14:47 +01:00
}
def __repr__(self):
return "Image: %s, photo: %r, EXIF: %s" % (
self.fname, self.is_photo, self.exif
)
2017-06-12 15:40:30 +01:00
def __str__(self):
tmplfile = "%s.html" % (__class__.__name__)
return shared.j2.get_template(tmplfile).render({'photo': self.tmplvars})
2017-06-12 15:40:30 +01:00
2017-10-27 15:56:05 +01:00
class Comment(object):
def __init__(self, fpath):
logging.debug("initiating comment object from %s", fpath)
self.fpath = fpath
self.mtime = os.path.getmtime(self.fpath)
with open(self.fpath, mode='rt') as f:
self.fm = frontmatter.parse(f.read())
self.meta, self.content = self.fm
@property
def dt(self):
return arrow.get(self.meta.get('date'))
@property
def html(self):
html = "%s" % (self.content)
return shared.Pandoc().convert(html)
@property
def target(self):
t = urlparse(self.meta.get('target'))
return t.path.rstrip('/').strip('/').split('/')[-1]
@property
def source(self):
return self.meta.get('source')
@property
def author(self):
url = self.meta.get('author').get('url', self.source)
name = self.meta.get('author').get('name', urlparse(url).hostname)
return {
'name': name,
'url': url
}
@property
def type(self):
# caching, because calling Pandoc is expensive
if not hasattr(self, '_type'):
self._type = 'webmention'
t = self.meta.get('type', 'webmention')
if 'webmention' != t:
self._type = ''
if len(self.content):
maybe = shared.Pandoc('plain').convert(self.content)
if maybe in UNICODE_EMOJI:
self._type = maybe
return self._type
@property
def tmplvars(self):
if not hasattr(self, '_tmplvars'):
self._tmplvars = {
'author': self.author,
'source': self.source,
'pubtime': self.dt.format(shared.ARROWFORMAT['iso']),
'pubdate': self.dt.format(shared.ARROWFORMAT['display']),
'html': self.html,
'type': self.type
}
return self._tmplvars
def __repr__(self):
return "Comment from %s for %s" % (
self.source, self.target
)
def __str__(self):
tmplfile = "%s.html" % (__class__.__name__)
return shared.j2.get_template(tmplfile).render({'comment': self.tmplvars})
class Webmention(object):
def __init__ (self, id, source, target, dt):
self.source = source
self.target = target
self.id = id
self.dt = arrow.get(dt).to('utc')
logging.info(
"processing webmention %s => %s",
self.source,
self.target
)
2017-10-27 15:56:05 +01:00
def _fetch(self):
self._source = shared.XRay(self.source).parse()
2017-10-27 15:56:05 +01:00
def _save(self):
fm = frontmatter.loads('')
fm.content = self.content
fm.metadata = self.meta
with open(self.fpath, 'wt') as f:
f.write(frontmatter.dumps(fm))
return
2017-10-27 15:56:05 +01:00
def run(self):
self._fetch()
if 'data' not in self._source:
return
self._save()
2017-10-27 15:56:05 +01:00
@property
def relation(self):
r = 'webmention'
k = self._source.get('data').keys()
for maybe in ['in-reply-to', 'repost-of', 'bookmark-of', 'like-of']:
if maybe in k:
r = maybe
break
return r
2017-10-27 15:56:05 +01:00
@property
def meta(self):
if not hasattr(self, '_meta'):
self._meta = {
'author': self._source.get('data').get('author'),
'type': self.relation,
'target': self.target,
'source': self.source,
'date': self._source.get('data').get('published'),
}
return self._meta
2017-10-27 15:56:05 +01:00
@property
def content(self):
if 'content' not in self._source.get('data'):
return ''
elif 'html' in self._source.get('data').get('content'):
what = self._source.get('data').get('content').get('html')
elif 'text' in self._source.get('data').get('content'):
what = self._source.get('data').get('content').get('text')
else:
return ''
return shared.Pandoc('html').convert(what)
@property
def fname(self):
return "%d-%s.md" % (
self.dt.timestamp,
shared.slugfname(self.source)
)
@property
def fpath(self):
tdir = os.path.join(
shared.config.get('dirs', 'comment'),
self.target.rstrip('/').strip('/').split('/')[-1]
)
if not os.path.isdir(tdir):
os.makedirs(tdir)
return os.path.join(
tdir,
self.fname
)
2017-10-27 15:56:05 +01:00
def setup():
""" parse input parameters and add them as params section to config """
parser = argparse.ArgumentParser(description='Parameters for NASG')
booleanparams = {
'regenerate': 'force downsizing images',
'force': 'force rendering HTML',
}
2017-06-02 11:19:55 +01:00
for k, v in booleanparams.items():
2017-06-28 12:20:26 +01:00
parser.add_argument(
'--%s' % (k),
2017-06-28 12:20:26 +01:00
action='store_true',
default=False,
help = v
2017-06-28 12:20:26 +01:00
)
2017-05-23 11:14:47 +01:00
parser.add_argument(
'--loglevel',
default='warning',
help='change loglevel'
)
if not shared.config.has_section('params'):
shared.config.add_section('params')
params = vars(parser.parse_args())
for k, v in params.items():
shared.config.set('params', k, str(v))
# remove the rest of the potential loggers
while len(logging.root.handlers) > 0:
logging.root.removeHandler(logging.root.handlers[-1])
logging.basicConfig(
level=shared.LLEVEL[shared.config.get('params', 'loglevel')],
format='%(asctime)s - %(levelname)s - %(message)s'
)
2017-10-28 19:08:40 +01:00
def youngest_mtime(root):
youngest = 0
files = glob.glob(os.path.join(root, '**'), recursive=True)
for f in files:
mtime = os.path.getmtime(f)
if mtime > youngest:
youngest = mtime
return youngest
def build():
setup()
2017-10-28 19:08:40 +01:00
#if not shared.config.getboolean('params', 'force'):
#last_run = youngest_mtime(shared.config.get('common', 'build'))
#lookin = [
#shared.config.get('dirs', 'content'),
#shared.config.get('dirs', 'comment'),
#]
#youngest = 0
#for root in lookin:
#maybe = youngest_mtime(root)
#if maybe > youngest:
#youngest = maybe
#if last_run > youngest:
#logging.info("last build happened at %s which is later than the last modification at %s; exiting silently",
#arrow.get(last_run),
#arrow.get(youngest)
#)
#return
loop = asyncio.get_event_loop()
tasks = []
content = Content()
sdb = shared.SearchDB()
magic = MagicPHP()
collector_front = Category()
collector_categories = NoDupeContainer()
for f, post in content:
logging.info("PARSING %s", f)
post.init_extras()
# extend redirects
for r in post.redirects:
magic.redirects.append((r, post.fname))
# add post to search, if needed
if not sdb.is_uptodate(post.fname, post.mtime):
sdb.append(
post.fname,
post.corpus,
post.mtime,
post.url,
post.category,
post.title
2017-06-02 11:19:55 +01:00
)
# add render task, if needed
2017-10-28 19:08:40 +01:00
if not post.is_uptodate or shared.config.getboolean('params', 'force'):
task = loop.create_task(post.render())
tasks.append(task)
# collect images to downsize
for fname, im in post.images:
task = loop.create_task(im.downsize())
tasks.append(task)
2017-10-28 19:08:40 +01:00
# skip adding future posts to any category
if post.is_future:
continue
# skip categories starting with _
if post.category.startswith('_'):
continue
# get the category otherwise
elif post.category not in collector_categories :
c = Category(post.category)
collector_categories.append(post.category, c)
else:
c = collector_categories[post.category]
2017-05-23 11:14:47 +01:00
# add post to category
c.append(post)
2017-05-23 11:14:47 +01:00
# add post to front
collector_front.append(post)
2017-05-23 11:14:47 +01:00
# write search db
sdb.finish()
2017-05-23 11:14:47 +01:00
# render front
2017-10-28 19:08:40 +01:00
if not collector_front.is_uptodate or shared.config.getboolean('params', 'force'):
task = loop.create_task(collector_front.render())
tasks.append(task)
2017-05-23 11:14:47 +01:00
# render categories
for name, c in collector_categories:
2017-10-28 19:08:40 +01:00
if not c.is_uptodate or shared.config.getboolean('params', 'force'):
task = loop.create_task(c.render())
tasks.append(task)
# add magic.php rendering
task = loop.create_task(magic.render())
tasks.append(task)
2017-05-23 11:14:47 +01:00
# TODO: send webmentions to any url
# TODO: comments
# TODO: ping websub?
# do all the things!
w = asyncio.wait(tasks)
loop.run_until_complete(w)
loop.close()
# copy static
logging.info('copying static files')
src = shared.config.get('dirs', 'static')
for item in os.listdir(src):
s = os.path.join(src,item)
d = os.path.join(shared.config.get('common', 'build'),item)
if not os.path.exists(d):
logging.debug("copying static file %s to %s", s, d)
2017-05-23 11:14:47 +01:00
shutil.copy2(s, d)
2017-05-23 11:13:35 +01:00
if __name__ == '__main__':
build()