nasg/nasg.py

1188 lines
34 KiB
Python
Raw Normal View History

2017-05-23 11:14:47 +01:00
#!/usr/bin/env python3
import os
import re
2017-05-23 11:13:35 +01:00
import logging
import configparser
2017-05-23 11:14:47 +01:00
import json
import glob
import argparse
import shutil
from urllib.parse import urlparse
2017-10-27 15:56:05 +01:00
#from urllib.parse import urljoin
2017-05-23 11:14:47 +01:00
import asyncio
from math import ceil
import csv
import sqlite3
2017-05-23 11:14:47 +01:00
import frontmatter
import arrow
2017-05-23 11:14:47 +01:00
import langdetect
import wand.image
2017-10-27 15:56:05 +01:00
#import requests
#from bs4 import BeautifulSoup
from emoji import UNICODE_EMOJI
2017-06-12 15:40:30 +01:00
import shared
import db
from pprint import pprint
2017-05-23 11:14:47 +01:00
class MagicPHP(object):
name = 'magic.php'
def __init__(self):
# init 'gone 410' array
self.gones = []
f = shared.config.get('var', 'gone')
if os.path.isfile(f):
with open(f) as csvfile:
reader = csv.reader(csvfile, delimiter=' ')
for row in reader:
self.gones.append(row[0])
# init manual redirects array
self.redirects = []
f = shared.config.get('var', 'redirects')
if os.path.isfile(f):
with open(f) as csvfile:
reader = csv.reader(csvfile, delimiter=' ')
for row in reader:
self.redirects.append((row[0], row[1]))
@property
def phpfile(self):
return os.path.join(
shared.config.get('common', 'build'),
self.name
)
async def render(self):
logging.info('saving %s' % (self.name))
o = self.phpfile
tmplfile = "%s.html" % (__class__.__name__)
r = shared.j2.get_template(tmplfile).render({
'site': shared.site,
'redirects': self.redirects,
'gones': self.gones
})
with open(o, 'wt') as out:
logging.debug('writing file %s' % (o))
out.write(r)
class NoDupeContainer(object):
""" Base class to hold keys => data dicts with errors on dupes """
2017-06-12 15:40:30 +01:00
def __init__(self):
self.data = {}
self.default = None
2017-06-12 15:40:30 +01:00
def append(self, key, value):
# all clear
if key not in self.data:
self.data.update({key: value})
2017-06-12 15:40:30 +01:00
return
# problem
logging.error(
"duplicate key error when populating %s: %s",
self.__class__.__name__,
key
)
logging.error(
"current: %s",
self.data.get(key)
)
logging.error(
"problem: %s",
value
)
return
# TODO: return ordered version of data
2017-06-12 15:40:30 +01:00
def __getitem__(self, key):
return self.data.get(key, self.default)
2017-06-12 15:40:30 +01:00
#def __delitem__(self, key):
#return del(self.data[key])
2017-06-12 15:40:30 +01:00
def __setitem__(self, key, value):
return self.append(key, value)
def __contains__(self, key):
if key in self.data.keys():
return True
return False
2017-06-12 15:40:30 +01:00
def __len__(self):
return len(self.data.keys())
2017-06-12 15:40:30 +01:00
def __next__(self):
try:
r = self.data.next()
except:
raise StopIteration()
return r
def __iter__(self):
for k, v in self.data.items():
yield (k, v)
return
#def __repr__(self):
#return json.dumps(self.data)
#def __str__(self):
#return "iteration container with %s items" % (len(self.data.keys()))
2017-06-12 15:40:30 +01:00
class FContainer(NoDupeContainer):
""" This is a container that holds a lists of files based on Container so it errors on duplicate slugs and is popolated with recorsive glob """
def __init__(self, dirs=[''], extensions=['*']):
super().__init__()
files = []
for ext in extensions:
for p in dirs:
files.extend(glob.iglob(
os.path.join(p,'*.%s' % (ext)),
recursive=True
))
# eliminate duplicates
files = list(set(files))
for fpath in files:
fname = os.path.basename(fpath)
self.append(fname, fpath)
class Content(FContainer):
""" This is a container that holds markdown files that are parsed when the container is populated on the fly; based on FContainer which is a Container """
def __init__(self):
dirs=[os.path.join(shared.config.get('dirs', 'content'), "**")]
extensions=['md', 'jpg']
super().__init__(dirs, extensions)
for fname, fpath in self.data.items():
self.data.update({fname: Singular(fpath)})
class Category(NoDupeContainer):
""" A Category which holds pubtime (int) => Singular data """
indexfile = 'index.html'
feedfile = 'index.atom'
feeddir = 'feed'
pagedir = 'page'
taxonomy = 'category'
def __init__(self, name=''):
self.name = name
super().__init__()
def append(self, post):
return super().append(post.pubtime, post)
2017-06-12 15:40:30 +01:00
@property
def mtime(self):
return int(sorted(self.data.keys(), reverse=True)[0])
@property
def is_uptodate(self):
index = os.path.join(self.path_paged(), self.indexfile)
if not os.path.isfile(index):
return False
mtime = os.path.getmtime(index)
if mtime == self.mtime:
return True
return False
@property
def title(self):
# TODO proper title
return self.name
def url_paged(self, page=1, feed=False):
x = '/'
if self.name:
x = "%s%s/%s" % (
x,
self.taxonomy,
self.name,
)
2017-05-23 11:14:47 +01:00
if page == 1 and feed:
x = "%s/%s/" % (x, self.feeddir)
else:
x = "%s/%s/%s/" % (x, self.pagedir, "%s" % page)
return x
2017-06-02 11:19:55 +01:00
def path_paged(self, page=1, feed=False):
x = shared.config.get('common', 'build')
2017-05-23 11:14:47 +01:00
if self.name:
x = os.path.join(
x,
self.taxonomy,
self.name,
)
2017-06-02 11:19:55 +01:00
if page == 1:
if feed:
x = os.path.join(x, self.feeddir)
else:
x = os.path.join(x, self.pagedir, "%s" % page)
2017-06-02 11:19:55 +01:00
if not os.path.isdir(x):
os.makedirs(x)
return x
2017-05-23 11:14:47 +01:00
2017-06-02 11:19:55 +01:00
def write_html(self, path, content):
with open(path, 'wt') as out:
logging.debug('writing file %s' % (path))
out.write(content)
os.utime(path, (self.mtime, self.mtime))
async def render(self):
if self.is_uptodate:
2017-06-02 11:19:55 +01:00
return
pagination = shared.config.getint('display', 'pagination')
pages = ceil(len(self.data) / pagination)
page = 1
while page <= pages:
# list relevant post templates
start = int((page-1) * pagination)
end = int(start + pagination)
posttmpls = [
self.data[k].tmplvars
for k in list(sorted(
self.data.keys(),
reverse=True
))[start:end]
]
# define data for template
tmplvars = {
'taxonomy': {
'title': self.title,
'name': self.name,
'page': page,
'total': pages,
'perpage': pagination,
'lastmod': arrow.get(self.mtime).format(shared.ARROWFORMAT['iso']),
'feed': self.url_paged(page=1, feed=True),
'url': self.url_paged(page),
},
'site': shared.site,
'posts': posttmpls,
}
# render HTML
dirname = self.path_paged(page)
o = os.path.join(dirname, self.indexfile)
logging.info("Rendering page %d/%d of category %s to %s", page, pages, self.name, o)
tmplfile = "%s.html" % (__class__.__name__)
r = shared.j2.get_template(tmplfile).render(tmplvars)
self.write_html(o, r)
# render feed
if 1 == page:
dirname = self.path_paged(page, feed=True)
o = os.path.join(dirname, self.feedfile)
logging.info("Rendering feed of category %s to %s", self.name, o)
tmplfile = "%s_%s.html" % (__class__.__name__, self.feeddir)
r = shared.j2.get_template(tmplfile).render(tmplvars)
self.write_html(o, r)
# inc. page counter
page = page+1
2017-05-23 11:14:47 +01:00
class Singular(object):
indexfile = 'index.html'
def __init__(self, fpath):
logging.debug("initiating singular object from %s", fpath)
self.fpath = fpath
self.mtime = os.path.getmtime(self.fpath)
self.fname, self.fext = os.path.splitext(os.path.basename(self.fpath))
self.category = os.path.basename(os.path.dirname(self.fpath))
self._images = NoDupeContainer()
if '.md' == self.fext:
with open(self.fpath, mode='rt') as f:
self.fm = frontmatter.parse(f.read())
self.meta, self.content = self.fm
self.photo = None
elif '.jpg' == self.fext:
self.photo = WebImage(self.fpath)
self.meta = self.photo.fm_meta
self.content = self.photo.fm_content
self.photo.inline = False
self.photo.cssclass = 'u-photo'
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def redirects(self):
r = self.meta.get('redirect', [])
r.append(self.shortslug)
return list(set(r))
2017-06-12 15:17:29 +01:00
@property
def is_uptodate(self):
if not os.path.isfile(self.htmlfile):
return False
mtime = os.path.getmtime(self.htmlfile)
if mtime == self.mtime:
return True
return False
2017-05-23 11:14:47 +01:00
@property
def htmlfile(self):
return os.path.join(
shared.config.get('common', 'build'),
self.fname,
self.indexfile
2017-05-23 11:14:47 +01:00
)
@property
def images(self):
if self.photo:
self._images.append(self.fname, self.photo)
# add inline images
for shortcode, alt, fname, title, css in self.inline_images:
# this does the appending automatically
im = self._find_image(fname)
return self._images
2017-10-27 15:56:05 +01:00
@property
def comments(self):
comments = NoDupeContainer()
cfiles = []
lookin = [*self.redirects, self.fname]
for d in lookin:
maybe = glob.glob(
os.path.join(
shared.config.get('dirs', 'comment'),
d,
'*.md'
)
)
cfiles = [*cfiles, *maybe]
for cpath in cfiles:
c = Comment(cpath)
comments.append(c.mtime, c)
return comments
@property
def replies(self):
r = {}
for mtime, c in self.comments:
if 'webmention' == c.type:
r.update({mtime:c.tmplvars})
return sorted(r.items())
@property
def reactions(self):
r = {}
for mtime, c in self.comments:
if 'webmention' == c.type:
continue
if c.type not in r:
r[c.type] = {}
r[c.type].update({mtime:c.tmplvars})
for icon, comments in r.items():
r[icon] = sorted(comments.items())
return r
@property
def exif(self):
if not self.photo:
return {}
return self.photo.exif
2017-06-12 15:40:30 +01:00
@property
def published(self):
return arrow.get(self.meta.get('published', self.mtime))
2017-05-23 11:14:47 +01:00
@property
def updated(self):
u = self.meta.get('updated', False)
if u:
u = arrow.get(u)
return u
2017-06-12 15:40:30 +01:00
@property
def pubtime(self):
return int(self.published.timestamp)
2017-05-23 11:14:47 +01:00
@property
def is_reply(self):
return self.meta.get('in-reply-to', False)
2017-06-12 15:40:30 +01:00
@property
def is_future(self):
now = arrow.utcnow().timestamp
if self.pubtime > now:
2017-05-23 11:14:47 +01:00
return True
return False
@property
def licence(self):
l = shared.config.get('licence', self.category,
fallback=shared.config.get('licence', 'default',))
return {
'text': 'CC %s 4.0' % l.upper(),
'url': 'https://creativecommons.org/licenses/%s/4.0/' % l,
}
2017-05-23 11:14:47 +01:00
@property
def corpus(self):
corpus = "\n".join([
"%s" % self.meta.get('title', ''),
"%s" % self.fname,
"%s" % self.meta.get('summary', ''),
"%s" % self.content,
])
2017-05-23 11:13:35 +01:00
if self.photo:
corpus = corpus + "\n".join(self.meta.get('tags', []))
2017-06-12 15:40:30 +01:00
return corpus
2017-06-02 11:19:55 +01:00
@property
def lang(self):
# default is English, this will only be changed if the try
# succeeds and actually detects a language
lang = 'en'
try:
lang = langdetect.detect("\n".join([
self.fname,
self.meta.get('title', ''),
self.content
]))
except:
pass
return lang
2017-06-12 15:40:30 +01:00
def _find_image(self, fname):
pattern = os.path.join(
shared.config.get('dirs', 'files'),
'*',
fname
)
logging.debug('trying to locate image %s in %s', fname, pattern)
maybe = glob.glob(pattern)
2017-06-02 11:19:55 +01:00
if not maybe:
return None
2017-06-12 15:40:30 +01:00
if fname not in self._images:
im = WebImage(maybe.pop())
self._images.append(fname,im)
return self._images[fname]
2017-06-02 11:19:55 +01:00
@property
def inline_images(self):
return shared.REGEX['mdimg'].findall(self.content)
2017-06-03 12:07:03 +01:00
@property
def url(self):
return "%s/%s" % (shared.config.get('site', 'url'), self.fname)
@property
def body(self):
body = "%s" % (self.content)
# get inline images, downsize them and convert them to figures
for shortcode, alt, fname, title, css in self.inline_images:
fname = os.path.basename(fname)
im = self._find_image(fname)
if not im:
continue
2017-06-02 11:19:55 +01:00
im.alt = alt
im.title = title
im.cssclass = css
body = body.replace(shortcode, str(im))
2017-06-02 11:19:55 +01:00
# TODO if multiple meta images, inline all except the first
# which will be added at the HTML stage or as enclosure to the feed
return body
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def html(self):
html = "%s" % (self.body)
2017-06-12 15:40:30 +01:00
# add photo
if self.photo:
html = "%s\n%s" % (str(self.photo), html)
2017-06-02 11:19:55 +01:00
return shared.Pandoc().convert(html)
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def title(self):
maybe = self.meta.get('title', False)
if maybe:
return maybe
if self.is_reply:
return "RE: %s" % self.is_reply
return self.published.format(shared.ARROWFORMAT['display'])
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def summary(self):
s = self.meta.get('summary', '')
if not s:
return s
return shared.Pandoc().convert(s)
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def shortslug(self):
return shared.baseN(self.pubtime)
2017-06-12 15:40:30 +01:00
2017-06-28 12:20:26 +01:00
@property
def tmplvars(self):
# very simple caching because we might use this 4 times:
# post HTML, category, front posts and atom feed
if not hasattr(self, '_tmplvars'):
self._tmplvars = {
'title': self.title,
'pubtime': self.published.format(shared.ARROWFORMAT['iso']),
'pubdate': self.published.format(shared.ARROWFORMAT['display']),
'category': self.category,
'html': self.html,
'lang': self.lang,
'slug': self.fname,
'shortslug': self.shortslug,
'licence': self.licence,
#'sourceurl': self.sourceurl,
'is_reply': self.is_reply,
'age': int(self.published.format('YYYY')) - int(arrow.utcnow().format('YYYY')),
2017-10-27 15:56:05 +01:00
'summary': self.summary,
'replies': self.replies,
'reactions': self.reactions,
}
return self._tmplvars
2017-06-28 12:20:26 +01:00
async def render(self):
logging.info('rendering %s' % (self.fname))
o = self.htmlfile
if self.is_uptodate:
logging.debug('%s is up to date' % (o))
return
tmplfile = "%s.html" % (__class__.__name__)
r = shared.j2.get_template(tmplfile).render({
'post': self.tmplvars,
'site': shared.site,
})
d = os.path.dirname(o)
if not os.path.isdir(d):
logging.debug('creating directory %s' % (d))
os.makedirs(d)
with open(o, 'wt') as out:
logging.debug('writing file %s' % (o))
out.write(r)
os.utime(o, (self.mtime, self.mtime))
2017-06-02 11:19:55 +01:00
def __repr__(self):
return "%s/%s" % (self.category, self.fname)
2017-06-02 11:19:55 +01:00
2017-06-12 15:40:30 +01:00
class WebImage(object):
def __init__(self, fpath):
logging.info("parsing image: %s", fpath)
self.fpath = fpath
self.mtime = os.path.getmtime(self.fpath)
bname = os.path.basename(fpath)
self.fname, self.fext = os.path.splitext(bname)
self.title = ''
self.alt = bname
self.target = ''
self.cssclass = ''
2017-06-02 11:19:55 +01:00
@property
def fm_content(self):
return self.meta.get('Description', '')
2017-06-12 15:40:30 +01:00
@property
def fm_meta(self):
return {
'published': self.meta.get('ReleaseDate',
self.meta.get('ModifyDate')
),
'title': self.meta.get('Headline', self.fname),
'tags': list(set(self.meta.get('Subject', []))),
}
2017-06-02 11:19:55 +01:00
@property
def href(self):
if len(self.target):
return self.target
2017-06-02 11:19:55 +01:00
if not self.is_downsizeable:
return False
2017-05-23 11:14:47 +01:00
return self.sizes[-1][1]['url']
2017-06-12 15:40:30 +01:00
@property
def src(self):
# is the image is too small to downsize, it will be copied over
# so the link needs to point at
src = "/%s/%s" % (
shared.config.get('common', 'files'),
"%s%s" % (self.fname, self.fext)
2017-05-23 11:14:47 +01:00
)
if self.is_downsizeable:
try:
src = [e for e in self.sizes if e[0] == shared.config.getint('photo', 'default')][0][1]['url']
except:
pass
return src
2017-05-23 11:14:47 +01:00
@property
def meta(self):
if not hasattr(self, '_exif'):
# reading EXIF is expensive enough even with a static generator
# to consider caching it, so I'll do that here
cpath = os.path.join(
shared.config.get('var', 'cache'),
"%s.exif.json" % self.fname
2017-05-23 11:14:47 +01:00
)
if os.path.exists(cpath):
cmtime = os.path.getmtime(cpath)
if cmtime >= self.mtime:
with open(cpath, 'rt') as f:
self._exif = json.loads(f.read())
return self._exif
self._exif = shared.ExifTool(self.fpath).read()
if not os.path.isdir(shared.config.get('var', 'cache')):
os.makedirs(shared.config.get('var', 'cache'))
with open(cpath, 'wt') as f:
f.write(json.dumps(self._exif))
return self._exif
@property
def is_photo(self):
# missing regex from config
if 'photo' not in shared.REGEX:
logging.debug('%s photo regex missing from config')
return False
cpr = self.meta.get('Copyright', '')
art = self.meta.get('Artist', '')
# both Artist and Copyright missing from EXIF
if not cpr and not art:
logging.debug('%s Artist or Copyright missing from EXIF')
return False
2017-05-23 11:14:47 +01:00
# we have regex, Artist and Copyright, try matching them
pattern = re.compile(shared.config.get('photo', 'regex'))
if pattern.search(cpr) or pattern.search(art):
return True
logging.debug('%s patterns did not match')
return False
2017-06-12 15:40:30 +01:00
2017-06-12 15:17:29 +01:00
@property
def exif(self):
exif = {}
2017-06-12 15:17:29 +01:00
if not self.is_photo:
return exif
2017-06-12 15:17:29 +01:00
mapping = {
'camera': ['Model'],
'aperture': ['FNumber','Aperture'],
'shutter_speed': ['ExposureTime'],
'focallength': ['FocalLengthIn35mmFormat', 'FocalLength'],
'iso': ['ISO'],
'lens': ['LensID', 'LensSpec', 'Lens',],
#'date': ['CreateDate','DateTimeOriginal'],
'geo_latitude': ['GPSLatitude'],
'geo_longitude': ['GPSLongitude'],
2017-06-12 15:17:29 +01:00
}
for ekey, candidates in mapping.items():
for candidate in candidates:
maybe = self.meta.get(candidate, None)
if not maybe:
continue
elif 'geo_' in ekey:
exif[ekey] = round(float(maybe), 5)
else:
exif[ekey] = maybe
break
return exif
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def sizes(self):
sizes = []
_max = max(
int(self.meta.get('ImageWidth')),
int(self.meta.get('ImageHeight'))
)
2017-06-03 12:07:03 +01:00
for size in shared.config.options('downsize'):
if _max < int(size):
continue
2017-05-23 11:14:47 +01:00
name = '%s_%s%s' % (
self.fname,
shared.config.get('downsize', size),
self.fext
)
2017-05-23 11:14:47 +01:00
fpath = os.path.join(
shared.config.get('common', 'build'),
shared.config.get('common', 'files'),
name
)
2017-05-23 11:13:35 +01:00
exists = os.path.isfile(fpath)
# in case there is a downsized image compare against the main file's
# mtime and invalidate the existing if it's older
if exists:
mtime = os.path.getmtime(fpath)
if self.mtime > mtime:
exists = False
2017-05-23 11:13:35 +01:00
sizes.append((
int(size),
{
'fpath': fpath,
'exists': os.path.isfile(fpath),
'url': "%s/%s/%s" % (
shared.config.get('site', 'url'),
shared.config.get('common', 'files'),
name
),
'crop': shared.config.getboolean(
'crop',
size,
fallback=False
)
}
))
return sorted(sizes, reverse=False)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def is_downsizeable(self):
""" Check if the image is large enought to downsize it """
2017-05-23 11:14:47 +01:00
ftype = self.meta.get('FileType', None)
if not ftype:
return False
elif ftype.lower() != 'jpeg' and ftype.lower() != 'png':
return False
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
_max = max(
int(self.meta.get('ImageWidth')),
int(self.meta.get('ImageHeight'))
2017-05-23 11:14:47 +01:00
)
_min = shared.config.getint('photo','default')
if _max > _min:
return True
2017-05-23 11:14:47 +01:00
return False
2017-06-12 15:40:30 +01:00
def _maybe_watermark(self, img):
2017-05-23 11:14:47 +01:00
""" Composite image by adding watermark file over it """
if not self.is_photo:
logging.debug("not watermarking: not a photo")
return img
wmarkfile = shared.config.get('photo', 'watermark')
2017-05-23 11:14:47 +01:00
if not os.path.isfile(wmarkfile):
logging.debug("not watermarking: watermark not found")
2017-05-23 11:14:47 +01:00
return img
logging.debug("%s is a photo, applying watermarking", self.fpath)
2017-05-23 11:14:47 +01:00
with wand.image.Image(filename=wmarkfile) as wmark:
if img.width > img.height:
w = img.width * 0.2
2017-05-23 11:14:47 +01:00
h = wmark.height * (w / wmark.width)
x = img.width - w - (img.width * 0.01)
y = img.height - h - (img.height * 0.01)
else:
w = img.height * 0.16
h = wmark.height * (w / wmark.width)
x = img.width - h - (img.width * 0.01)
y = img.height - w - (img.height * 0.01)
w = round(w)
h = round(h)
x = round(x)
y = round(y)
wmark.resize(w, h)
if img.width <= img.height:
wmark.rotate(-90)
img.composite(image=wmark, left=x, top=y)
2017-05-23 11:14:47 +01:00
return img
def _copy(self):
fname = "%s%s" % (self.fname, self.fext)
logging.info("copying %s to build dir", fname)
fpath = os.path.join(
shared.config.get('common', 'build'),
shared.config.get('common', 'files'),
fname
)
if os.path.isfile(fpath):
mtime = os.path.getmtime(fpath)
if self.mtime <= mtime:
return
shutil.copy(self.fpath, fpath)
2017-05-23 11:14:47 +01:00
def _intermediate_dimension(self, size, width, height, crop=False):
""" Calculate intermediate resize dimension and return a tuple of width, height """
2017-05-23 11:14:47 +01:00
size = int(size)
if (width > height and not crop) \
or (width < height and crop):
w = size
h = int(float(size / width) * height)
2017-05-23 11:13:35 +01:00
else:
2017-05-23 11:14:47 +01:00
h = size
w = int(float(size / height) * width)
return (w, h)
def _intermediate(self, img, size, target, crop=False):
2017-05-23 11:14:47 +01:00
if img.width <= size and img.height <= size:
return False
with img.clone() as thumb:
width, height = self._intermediate_dimension(
2017-05-23 11:14:47 +01:00
size,
img.width,
img.height,
crop
)
thumb.resize(width, height)
if crop:
thumb.liquid_rescale(size, size, 1, 1)
if self.meta.get('FileType', 'jpeg').lower() == 'jpeg':
thumb.compression_quality = 86
thumb.unsharp_mask(
radius=0,
sigma=0.5,
amount=1,
threshold=0.03
2017-05-23 11:13:35 +01:00
)
2017-05-23 11:14:47 +01:00
thumb.format = 'pjpeg'
# this is to make sure pjpeg happens
with open(target, 'wb') as f:
logging.info("writing %s", target)
2017-05-23 11:14:47 +01:00
thumb.save(file=f)
@property
def needs_downsize(self):
needed = False
for (size, downsized) in self.sizes:
if downsized.get('exists', False):
logging.debug("size %d exists: %s", size, downsized.get('fpath'))
continue
logging.debug("size %d missing: %s", size, downsized.get('fpath'))
needed = True
return needed
2017-05-23 11:14:47 +01:00
async def downsize(self):
2017-05-23 11:14:47 +01:00
if not self.is_downsizeable:
return self._copy()
2017-05-23 11:14:47 +01:00
if not self.needs_downsize and not shared.config.getboolean('params', 'regenerate'):
return
2017-05-23 11:14:47 +01:00
build_files = os.path.join(
shared.config.get('common', 'build'),
shared.config.get('common', 'files'),
)
2017-05-23 11:14:47 +01:00
if not os.path.isdir(build_files):
os.makedirs(build_files)
2017-05-23 11:14:47 +01:00
logging.info("downsizing %s%s", self.fname, self.fext)
2017-05-23 11:14:47 +01:00
with wand.image.Image(filename=self.fpath) as img:
img.auto_orient()
img = self._maybe_watermark(img)
for (size, downsized) in self.sizes:
self._intermediate(
img,
size,
downsized['fpath'],
downsized['crop']
2017-06-12 15:17:29 +01:00
)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def tmplvars(self):
return {
'src': self.src,
'target': self.href,
'css': self.cssclass,
2017-05-23 11:14:47 +01:00
'title': self.title,
'alt': self.alt,
2017-05-23 11:14:47 +01:00
'exif': self.exif,
'is_photo': self.is_photo,
'author': self.meta.get('Artist', ''),
2017-05-23 11:14:47 +01:00
}
def __repr__(self):
return "Image: %s, photo: %r, EXIF: %s" % (
self.fname, self.is_photo, self.exif
)
2017-06-12 15:40:30 +01:00
def __str__(self):
tmplfile = "%s.html" % (__class__.__name__)
return shared.j2.get_template(tmplfile).render({'photo': self.tmplvars})
2017-06-12 15:40:30 +01:00
2017-10-27 15:56:05 +01:00
class Comment(object):
def __init__(self, fpath):
logging.debug("initiating comment object from %s", fpath)
self.fpath = fpath
self.mtime = os.path.getmtime(self.fpath)
with open(self.fpath, mode='rt') as f:
self.fm = frontmatter.parse(f.read())
self.meta, self.content = self.fm
@property
def dt(self):
return arrow.get(self.meta.get('date'))
@property
def html(self):
html = "%s" % (self.content)
return shared.Pandoc().convert(html)
@property
def target(self):
t = urlparse(self.meta.get('target'))
return t.path.rstrip('/').strip('/').split('/')[-1]
@property
def source(self):
return self.meta.get('source')
@property
def author(self):
url = self.meta.get('author').get('url', self.source)
name = self.meta.get('author').get('name', urlparse(url).hostname)
return {
'name': name,
'url': url
}
@property
def type(self):
# caching, because calling Pandoc is expensive
if not hasattr(self, '_type'):
self._type = 'webmention'
t = self.meta.get('type', 'webmention')
if 'webmention' != t:
self._type = ''
if len(self.content):
maybe = shared.Pandoc('plain').convert(self.content)
if maybe in UNICODE_EMOJI:
self._type = maybe
return self._type
@property
def tmplvars(self):
if not hasattr(self, '_tmplvars'):
self._tmplvars = {
'author': self.author,
'source': self.source,
'pubtime': self.dt.format(shared.ARROWFORMAT['iso']),
'pubdate': self.dt.format(shared.ARROWFORMAT['display']),
'html': self.html,
'type': self.type
}
return self._tmplvars
def __repr__(self):
return "Comment from %s for %s" % (
self.source, self.target
)
def __str__(self):
tmplfile = "%s.html" % (__class__.__name__)
return shared.j2.get_template(tmplfile).render({'comment': self.tmplvars})
#class SendWebmention(object):
## TODO def __init__(self, source, target):
## check in gone.tsv?
## discover endpoint
## send webmention
## add to DB on return
#def run(self):
#return
#class ReceiveWebmention(object):
## TODO def __init__(self, source, target):
## pull remote
## validate if page links to X anywhere
## find h-entry or use root as SOURCE
## find author in SOURCE
## find content in SOURCE
## save under comments/[target slug]/mtime-[from-slufigied-url].md
##
## add to DB on return
#def run(self):
#return
#def parse_received_queue():
# iterate over DB received
#def parse_send_queue():
# iterate over DB needs sending
#def webmentions(target_slug):
# find all webmentions in the relevant directory
# return mtime => Webmention hash
def setup():
""" parse input parameters and add them as params section to config """
parser = argparse.ArgumentParser(description='Parameters for NASG')
booleanparams = {
'regenerate': 'force downsizing images',
'force': 'force rendering HTML',
}
2017-06-02 11:19:55 +01:00
for k, v in booleanparams.items():
2017-06-28 12:20:26 +01:00
parser.add_argument(
'--%s' % (k),
2017-06-28 12:20:26 +01:00
action='store_true',
default=False,
help = v
2017-06-28 12:20:26 +01:00
)
2017-05-23 11:14:47 +01:00
parser.add_argument(
'--loglevel',
default='warning',
help='change loglevel'
)
if not shared.config.has_section('params'):
shared.config.add_section('params')
params = vars(parser.parse_args())
for k, v in params.items():
shared.config.set('params', k, str(v))
# remove the rest of the potential loggers
while len(logging.root.handlers) > 0:
logging.root.removeHandler(logging.root.handlers[-1])
logging.basicConfig(
level=shared.LLEVEL[shared.config.get('params', 'loglevel')],
format='%(asctime)s - %(levelname)s - %(message)s'
)
def build():
setup()
loop = asyncio.get_event_loop()
tasks = []
content = Content()
sdb = db.SearchDB()
magic = MagicPHP()
collector_front = Category()
collector_categories = NoDupeContainer()
for f, post in content:
logging.info("PARSING %s", f)
# extend redirects
for r in post.redirects:
magic.redirects.append((r, post.fname))
# add post to search, if needed
if not sdb.is_uptodate(post.fname, post.mtime):
sdb.append(
post.fname,
post.corpus,
post.mtime,
post.url,
post.category,
post.title
2017-06-02 11:19:55 +01:00
)
# add render task, if needed
if not post.is_uptodate or shared.config.get('params', 'force'):
task = loop.create_task(post.render())
tasks.append(task)
# collect images to downsize
for fname, im in post.images:
task = loop.create_task(im.downsize())
tasks.append(task)
# skip categories starting with _
if post.category.startswith('_'):
continue
# get the category otherwise
elif post.category not in collector_categories :
c = Category(post.category)
collector_categories.append(post.category, c)
else:
c = collector_categories[post.category]
2017-05-23 11:14:47 +01:00
# add post to category
c.append(post)
2017-05-23 11:14:47 +01:00
# add post to front
collector_front.append(post)
2017-05-23 11:14:47 +01:00
# write search db
sdb.finish()
2017-05-23 11:14:47 +01:00
# render front
task = loop.create_task(collector_front.render())
tasks.append(task)
2017-05-23 11:14:47 +01:00
# render categories
for name, c in collector_categories:
task = loop.create_task(c.render())
tasks.append(task)
# add magic.php rendering
task = loop.create_task(magic.render())
tasks.append(task)
2017-05-23 11:14:47 +01:00
# TODO: send webmentions to any url
# TODO: comments
# TODO: ping websub?
# do all the things!
w = asyncio.wait(tasks)
loop.run_until_complete(w)
loop.close()
# copy static
logging.info('copying static files')
src = shared.config.get('dirs', 'static')
for item in os.listdir(src):
s = os.path.join(src,item)
d = os.path.join(shared.config.get('common', 'build'),item)
if not os.path.exists(d):
logging.debug("copying static file %s to %s", s, d)
2017-05-23 11:14:47 +01:00
shutil.copy2(s, d)
2017-05-23 11:13:35 +01:00
if __name__ == '__main__':
build()