nasg/nasg.py

2150 lines
63 KiB
Python
Raw Normal View History

2017-05-23 11:14:47 +01:00
#!/usr/bin/env python3
import os
import re
import configparser
2017-05-23 11:13:35 +01:00
import argparse
2017-05-23 11:14:47 +01:00
import shutil
2017-05-23 11:13:35 +01:00
import logging
2017-05-23 11:14:47 +01:00
import json
import glob
import tempfile
import atexit
2017-05-23 11:13:35 +01:00
import re
2017-05-23 11:14:47 +01:00
import hashlib
import math
import asyncio
import csv
2017-06-02 11:19:55 +01:00
import getpass
2017-06-28 12:20:26 +01:00
import quopri
import base64
import mimetypes
2017-05-23 11:14:47 +01:00
import magic
2017-05-23 11:13:35 +01:00
import arrow
2017-05-23 11:14:47 +01:00
import wand.image
import similar_text
import frontmatter
2017-05-23 11:13:35 +01:00
from slugify import slugify
2017-05-23 11:14:47 +01:00
import langdetect
import requests
from whoosh import index
from whoosh import qparser
2017-05-23 11:14:47 +01:00
import jinja2
import urllib.parse
from webmentiontools.send import WebmentionSend
import bleach
2017-06-02 11:19:55 +01:00
from emoji import UNICODE_EMOJI
2017-06-12 15:17:29 +01:00
from bs4 import BeautifulSoup
2017-06-28 12:20:26 +01:00
from readability.readability import Document
2017-06-12 15:40:30 +01:00
import shared
2017-05-23 11:14:47 +01:00
def splitpath(path):
parts = []
(path, tail) = os.path.split(path)
while path and tail:
parts.insert(0,tail)
(path,tail) = os.path.split(path)
return parts
2017-06-12 15:40:30 +01:00
class BaseIter(object):
def __init__(self):
self.data = {}
def append(self, key, value):
if key in self.data:
logging.warning("duplicate key: %s, using existing instead", key)
existing = self.data.get(key)
if hasattr(value, 'fname') and hasattr(existing, 'fname'):
logging.warning(
"%s collides with existing %s",
value.fname,
existing.fname
)
return
self.data[key] = value
def __getitem__(self, key):
return self.data.get(key, {})
def __repr__(self):
return json.dumps(list(self.data.values()))
def __next__(self):
try:
r = self.data.next()
except:
raise StopIteration()
return r
def __iter__(self):
for k, v in self.data.items():
yield (k, v)
return
class BaseRenderable(object):
def __init__(self):
return
2017-06-12 15:40:30 +01:00
2017-06-28 12:20:26 +01:00
def writerendered(self, content, mtime=None):
mtime = mtime or self.mtime
2017-06-12 15:40:30 +01:00
d = os.path.dirname(self.target)
if not os.path.isdir(d):
os.mkdir(d)
2017-06-12 15:40:30 +01:00
with open(self.target, "w") as html:
logging.debug('writing %s', self.target)
html.write(content)
html.close()
2017-06-28 12:20:26 +01:00
os.utime(self.target, (mtime, mtime))
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
class Indexer(object):
2017-05-23 11:14:47 +01:00
def __init__(self):
self.target = os.path.abspath(os.path.join(
shared.config.get('target', 'builddir'),
shared.config.get('var', 'searchdb')
))
2017-06-02 11:19:55 +01:00
if not os.path.isdir(self.target):
os.mkdir(self.target)
2017-05-23 11:14:47 +01:00
2017-06-02 11:19:55 +01:00
self.mtime = 0
if index.exists_in(self.target):
self.ix = index.open_dir(self.target)
2017-06-02 11:19:55 +01:00
tocfiles = glob.glob(os.path.join(
self.target,
"_MAIN_*.toc"
))
if len(tocfiles):
self.mtime = int(os.path.getmtime(tocfiles[0]))
else:
self.ix = index.create_in(self.target, shared.schema)
2017-06-02 11:19:55 +01:00
self.writer = self.ix.writer()
self.qp = qparser.QueryParser("url", schema=shared.schema)
2017-05-23 11:14:47 +01:00
2017-06-02 11:19:55 +01:00
2017-05-23 11:14:47 +01:00
async def append(self, singular):
if singular.isfuture:
return
logging.debug("searching for existing index for %s", singular.fname)
2017-06-02 11:19:55 +01:00
if self.mtime >= singular.mtime:
logging.debug("search index is newer than post mtime (%d vs %d), skipping post", self.mtime, singular.mtime)
return
2017-06-02 11:19:55 +01:00
exists = False
q = self.qp.parse(singular.url)
r = self.ix.searcher().search(q, limit=1)
if r:
r = r[0]
# nothing to do, the entry is present and is up to date
ixtime = r['mtime']
if int(ixtime) == int(singular.mtime):
logging.info("search index is up to date for %s", singular.fname)
return
else:
logging.info("search index is out of date: %d (indexed) vs %d", ixtime, singular.mtime)
exists = True
2017-05-23 11:14:47 +01:00
reactions = []
for t, v in singular.reactions.items():
if isinstance(v, list) and len(v):
reactions = [*reactions, *v]
2017-05-23 11:14:47 +01:00
content = "\n\n".join([
"\n\n".join([
bleach.clean(c, tags=[], strip_comments=True, strip=True)
for c in [singular.sumhtml, singular.html]
]),
"\n".join(["%s" % c for c in singular.tags]),
"\n\n".join(reactions)
])
2017-05-23 11:14:47 +01:00
weight = 1
2017-07-04 12:30:32 +01:00
if singular.isbookmark or singular.isfav:
weight = 10
if singular.ispage:
weight = 100
if not len(singular.title):
title = singular.fname
else:
title = singular.title
if singular.photo:
img = shared.Pandoc().convert("%s" % singular.photo)
else:
img = ''
args = {
'url': singular.url,
'category': singular.category,
'date': singular.published.datetime,
'title': title,
'weight': weight,
'img': img,
'content': content,
'fuzzy': content,
'mtime': singular.mtime
}
if exists:
logging.info("updating search index with %s", singular.fname)
self.writer.add_document(**args)
else:
logging.info("appending search index with %s", singular.fname)
self.writer.update_document(**args)
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
def finish(self):
self.writer.commit()
2017-06-12 15:17:29 +01:00
2017-05-23 11:14:47 +01:00
class Renderer(object):
def __init__(self):
self.sitevars = dict(shared.config.items('site'))
self.sitevars['author'] = dict(shared.config.items('author'))
self.sitevars['author']['socials'] = dict(shared.config.items('socials'))
self.sitevars['author']['qr'] = dict(shared.config.items('qr'))
2017-05-23 11:14:47 +01:00
self.jinjaldr = jinja2.FileSystemLoader(
searchpath=shared.config.get('source', 'templatesdir')
)
self.j2 = jinja2.Environment(loader=self.jinjaldr)
self.j2.filters['date'] = Renderer.jinja_filter_date
self.j2.filters['search'] = Renderer.jinja_filter_search
self.j2.filters['slugify'] = Renderer.jinja_filter_slugify
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@staticmethod
def jinja_filter_date(d, form='%Y-%m-%d %H:%m:%S'):
if d == 'now':
2017-06-28 12:20:26 +01:00
d = arrow.now().datetime
2017-05-23 11:14:47 +01:00
if form == 'c':
2017-06-28 12:20:26 +01:00
return d.isoformat()
#form = '%Y-%m-%dT%H:%M:%S%z'
2017-05-23 11:14:47 +01:00
return d.strftime(form)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@staticmethod
def jinja_filter_slugify(s):
return slugify(s, only_ascii=True, lower=True)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@staticmethod
def jinja_filter_search(s, r):
if r in s:
return True
return False
# based on http://stackoverflow.com/a/10075210
class ExifTool(shared.CMDLine):
2017-05-23 11:14:47 +01:00
""" Handles calling external binary `exiftool` in an efficient way """
sentinel = "{ready}\n"
2017-05-23 11:13:35 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:13:35 +01:00
def __init__(self):
2017-05-23 11:14:47 +01:00
super().__init__('exiftool')
2017-06-12 15:40:30 +01:00
def run(self, *filenames):
return json.loads(self.execute(
'-sort',
'-json',
'-MIMEType',
'-FileType',
'-FileName',
'-ModifyDate',
'-CreateDate',
'-DateTimeOriginal',
'-ImageHeight',
'-ImageWidth',
'-Aperture',
'-FOV',
'-ISO',
'-FocalLength',
'-FNumber',
'-FocalLengthIn35mmFormat',
'-ExposureTime',
'-Copyright',
'-Artist',
'-Model',
'-GPSLongitude#',
'-GPSLatitude#',
'-LensID',
*filenames))
2017-05-23 11:14:47 +01:00
2017-06-02 11:19:55 +01:00
class Comment(BaseRenderable):
2017-06-02 11:19:55 +01:00
def __init__(self, path):
logging.debug("initiating comment object from %s", path)
self.path = path
self.fname, self.ext = os.path.splitext(os.path.basename(self.path))
self.mtime = int(os.path.getmtime(self.path))
self.meta = {}
self.content = ''
self.tmplfile = 'comment.html'
2017-06-02 11:19:55 +01:00
self.__parse()
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
def __repr__(self):
return "%s" % (self.path)
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
def __parse(self):
with open(self.path, mode='rt') as f:
self.meta, self.content = frontmatter.parse(f.read())
@property
def reacji(self):
2017-06-03 12:07:03 +01:00
if hasattr(self, '_reacji'):
return self._reacji
self._reacji = ''
maybe = bleach.clean(
self.content,
tags=[],
strip_comments=True,
strip=True,
).strip()
if not len(maybe):
self._reacji = ''
elif maybe in UNICODE_EMOJI:
self._reacji = maybe
#t = self.meta.get('type', 'webmention')
#typemap = {
#'like-of': '👍',
#'bookmark-of': '🔖',
#'favorite': '★',
#}
#if t in typemap.keys():
#self._reacji = typemap[t]
#else:
2017-06-02 11:19:55 +01:00
2017-06-03 12:07:03 +01:00
return self._reacji
2017-06-02 11:19:55 +01:00
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def html(self):
2017-06-03 12:07:03 +01:00
if hasattr(self, '_html'):
return self._html
tmp = shared.Pandoc().convert(self.content)
self._html = bleach.clean(tmp, strip=True)
2017-06-03 12:07:03 +01:00
return self._html
2017-06-02 11:19:55 +01:00
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def tmplvars(self):
2017-06-03 12:07:03 +01:00
if hasattr(self, '_tmplvars'):
return self._tmplvars
self._tmplvars = {
2017-06-02 11:19:55 +01:00
'published': self.published.datetime,
'author': self.meta.get('author', {}),
#'content': self.content,
#'html': self.html,
2017-06-03 12:07:03 +01:00
'source': self.source,
2017-06-28 12:20:26 +01:00
'target': self.targeturl,
2017-06-02 11:19:55 +01:00
'type': self.meta.get('type', 'webmention'),
2017-06-03 12:07:03 +01:00
'reacji': self.reacji,
'fname': self.fname
2017-06-02 11:19:55 +01:00
}
2017-06-03 12:07:03 +01:00
return self._tmplvars
2017-06-02 11:19:55 +01:00
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def published(self):
2017-06-03 12:07:03 +01:00
if hasattr(self, '_published'):
return self._published
self._published = arrow.get(self.meta.get('date', self.mtime))
return self._published
2017-06-02 11:19:55 +01:00
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def pubtime(self):
return int(self.published.timestamp)
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def source(self):
2017-06-03 12:07:03 +01:00
if hasattr(self, '_source'):
return self._source
s = self.meta.get('source', '')
domains = shared.config.get('site', 'domains').split(' ')
self._source = s
for d in domains:
2017-06-02 11:19:55 +01:00
if d in s:
2017-06-03 12:07:03 +01:00
self._source = ''
return self._source
2017-06-02 11:19:55 +01:00
2017-06-12 15:40:30 +01:00
2017-06-28 12:20:26 +01:00
@property
def targeturl(self):
if hasattr(self, '_targeturl'):
return self._targeturl
t = self.meta.get('target', shared.config.get('site', 'url'))
self._targeturl = '{p.path}'.format(p=urllib.parse.urlparse(t)).strip('/')
return self._targeturl
2017-06-02 11:19:55 +01:00
@property
def target(self):
2017-06-03 12:07:03 +01:00
if hasattr(self, '_target'):
return self._target
2017-06-12 15:40:30 +01:00
targetdir = os.path.abspath(os.path.join(
shared.config.get('target', 'builddir'),
shared.config.get('site', 'commentspath'),
self.fname
))
2017-06-28 12:20:26 +01:00
self._target = os.path.join(targetdir, 'index.html')
return self._target
async def render(self, renderer):
logging.info("rendering and saving comment %s", self.fname)
if not shared.config.getboolean('params', 'force') and os.path.isfile(self.target):
ttime = int(os.path.getmtime(self.target))
logging.debug('ttime is %d mtime is %d', ttime, self.mtime)
if ttime == self.mtime:
2017-06-28 12:20:26 +01:00
logging.debug(
'%s exists and up-to-date (lastmod: %d)',
self.target,
ttime
)
return
tmplvars = {
'reply': self.tmplvars,
'site': renderer.sitevars,
'taxonomy': {},
}
r = renderer.j2.get_template(self.tmplfile).render(tmplvars)
2017-06-12 15:40:30 +01:00
self.writerendered(r)
2017-06-02 11:19:55 +01:00
class Comments(object):
def __init__(self):
self.files = glob.glob(os.path.join(
shared.config.get('source', 'commentsdir'),
"*.md"
))
self.bytarget = {}
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
def __getitem__(self, key):
return self.bytarget.get(key, BaseIter())
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
def populate(self):
for fpath in self.files:
item = Comment(fpath)
t = item.targeturl
2017-06-02 11:19:55 +01:00
if not self.bytarget.get(t):
self.bytarget[t] = BaseIter()
self.bytarget[t].append(item.pubtime, item)
2017-05-23 11:14:47 +01:00
class Images(BaseIter):
def __init__(self, extensions=['jpg', 'gif', 'png']):
super(Images, self).__init__()
logging.info(
"initiating images with extensions: %s",
extensions
)
2017-05-23 11:13:35 +01:00
self.files = []
2017-05-23 11:14:47 +01:00
self.data = {}
# if anyone knows how to do this in a more pythonic way, please tell me
paths = [
shared.config.get('source', 'filesdir'),
shared.config.get('source', 'photosdir')
]
for p in paths:
for ext in extensions:
self.files += glob.glob(os.path.join(p, "*.%s" % ext))
def populate(self):
with ExifTool() as e:
2017-06-12 15:40:30 +01:00
_meta = e.run(*self.files)
2017-05-23 11:14:47 +01:00
# parsing the returned meta into a dict of [filename]={meta}
for e in _meta:
if 'FileName' not in e:
logging.error("missing 'FileName' in element %s", e)
continue
fname = os.path.basename(e['FileName'])
del(e['FileName'])
# duplicate files are going to be a problem, so don't send it
# away with a simple error log entry
if fname in self.data:
raise ValueError('filename collision: %s', fname)
# convert dates
for k, v in e.items():
e[k] = self.exifdate(v)
self.data[fname] = WebImage(fname, e)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
def exifdate(self, value):
""" converts and EXIF date string to ISO 8601 format
2017-05-23 11:13:35 +01:00
2017-05-23 11:14:47 +01:00
:param value: EXIF date (2016:05:01 00:08:24)
:type arg1: str
:return: ISO 8601 string with UTC timezone 2016-05-01T00:08:24+0000
:rtype: str
"""
if not isinstance(value, str):
return value
match = shared.EXIFREXEG.match(value)
if not match:
return value
return "%s-%s-%sT%s+0000" % (
match.group('year'),
match.group('month'),
match.group('day'),
match.group('time')
)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
class WebImage(object):
def __init__(self, fname, meta):
logging.info(
"parsing image: %s",
fname
)
self.meta = meta
self.fpath = os.path.abspath(meta.get('SourceFile', fname))
self.fname, self.ext = os.path.splitext(fname)
self.alttext = ''
self.sizes = []
self.fallbacksize = int(shared.config.get('common','fallbackimg', fallback='720'))
2017-06-12 15:17:29 +01:00
self.cl = ''
self.singleimage = False
2017-05-23 11:14:47 +01:00
for size in shared.config.options('downsize'):
sizeext = shared.config.get('downsize', size)
fname = "%s_%s%s" % (self.fname, sizeext, self.ext)
self.sizes.append((
int(size),
{
'fpath': os.path.join(
shared.config.get('target', 'filesdir'),
fname
),
'url': "/%s/%s" % (
#'url': "%s/%s/%s" % (
#shared.config.get('site', 'url'),
2017-05-23 11:14:47 +01:00
shared.config.get('source', 'files'),
fname
),
'crop': shared.config.getboolean('crop', size, fallback=False),
}
))
self.sizes = sorted(self.sizes, reverse=False)
self.target = False
if self.is_downsizeable:
self.fallback = [e for e in self.sizes if e[0] == self.fallbacksize][0][1]['url']
self.small = [e for e in self.sizes if e[1]['crop'] == False][0][1]['url']
2017-05-23 11:14:47 +01:00
self.target = self.sizes[-1][1]['url']
else:
self.small = self.fallback = "/%s/%s" % (
#self.small = self.fallback = "%s/%s/%s" % (
#shared.config.get('site', 'url'),
2017-05-23 11:14:47 +01:00
shared.config.get('source', 'files'),
"%s%s" % (self.fname, self.ext)
)
def _setupvars(self):
2017-06-12 15:17:29 +01:00
if self.is_downsizeable:
if self.singleimage and not self.cl:
self.cl = '.u-photo'
elif self.singleimage:
self.cl = '.u-photo %s' % self.cl
else:
if not self.cl:
self.cl = '.aligncenter'
@property
def tmplvars(self):
self._setupvars()
if hasattr(self, '_tmplvars'):
return self._tmplvars
self._tmplvars = {
'alttext': self.alttext,
'fallback': self.fallback,
'small': self.small,
'title': "%s%s" % (self.fname, self.ext),
'target': self.target,
'cl': " ".join([s[1:] for s in self.cl.split()]),
'orientation': self.orientation
}
return self._tmplvars
def __str__(self):
self._setupvars()
if self.is_downsizeable:
#if self.singleimage and not self.cl:
#self.cl = '.u-photo'
#elif self.singleimage:
#self.cl = '.u-photo %s' % self.cl
2017-06-12 15:17:29 +01:00
return '[![%s](%s "%s%s"){.adaptimg}](%s){.adaptive %s}' % (
2017-05-23 11:14:47 +01:00
self.alttext,
self.fallback,
self.fname,
2017-06-12 15:17:29 +01:00
self.ext,
self.target,
self.cl
)
2017-05-23 11:14:47 +01:00
else:
#if not self.cl:
#self.cl = '.aligncenter'
2017-06-12 15:17:29 +01:00
return '![%s](%s "%s%s"){%s}' % (
2017-05-23 11:14:47 +01:00
self.alttext,
2017-06-12 15:17:29 +01:00
self.fallback,
2017-05-23 11:14:47 +01:00
self.fname,
2017-06-12 15:17:29 +01:00
self.ext,
self.cl
2017-05-23 11:14:47 +01:00
)
2017-06-12 15:40:30 +01:00
2017-06-12 15:17:29 +01:00
@property
def exif(self):
if not self.is_photo:
return {}
if hasattr(self, '_exif'):
return self._exif
exif = {}
mapping = {
'camera': [
'Model'
],
'aperture': [
'FNumber',
'Aperture'
],
'shutter_speed': [
'ExposureTime'
],
'focallength35mm': [
'FocalLengthIn35mmFormat',
],
'focallength': [
'FocalLength',
],
'iso': [
'ISO'
],
'lens': [
'LensID',
],
'date': [
'CreateDate',
'DateTimeOriginal',
],
'geo_latitude': [
'GPSLatitude'
],
'geo_longitude': [
'GPSLongitude'
],
}
for ekey, candidates in mapping.items():
for candidate in candidates:
maybe = self.meta.get(candidate, None)
if maybe:
if 'geo_' in ekey:
exif[ekey] = round(float(maybe), 5)
else:
exif[ekey] = maybe
break
self._exif = exif
return self._exif
@property
def orientation(self):
width = int(self.meta.get('ImageWidth', 0))
height = int(self.meta.get('ImageHeight', 0))
if width >= height:
return 'horizontal'
return 'vertical'
2017-06-12 15:17:29 +01:00
2017-05-23 11:14:47 +01:00
@property
def rssenclosure(self):
""" Returns the largest available image for RSS to add as attachment """
2017-06-03 12:07:03 +01:00
if hasattr(self, '_rssenclosure'):
return self._rssenclosure
2017-05-23 11:14:47 +01:00
target = self.sizes[-1][1]
2017-06-03 12:07:03 +01:00
self._rssenclosure = {
2017-05-23 11:14:47 +01:00
'mime': magic.Magic(mime=True).from_file(target['fpath']),
'url': target['url'],
2017-06-28 12:20:26 +01:00
'size': os.path.getsize(target['fpath']),
'fname': self.fname
2017-05-23 11:14:47 +01:00
}
2017-06-03 12:07:03 +01:00
return self._rssenclosure
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def is_photo(self):
2017-06-03 12:07:03 +01:00
if hasattr(self, '_is_photo'):
return self._is_photo
self._is_photo = False
#if not pattern or not isinstance(pattern, str):
# return False
pattern = re.compile(shared.config.get('photo', 'regex'))
2017-05-23 11:14:47 +01:00
cpr = self.meta.get('Copyright', '')
art = self.meta.get('Artist', '')
if not cpr and not art:
return False
2017-06-03 12:07:03 +01:00
if cpr and art:
if pattern.search(cpr) or pattern.search(art):
self._is_photo = True
2017-05-23 11:13:35 +01:00
2017-06-03 12:07:03 +01:00
return self._is_photo
2017-05-23 11:13:35 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def is_downsizeable(self):
2017-06-03 12:07:03 +01:00
if hasattr(self, '_is_downsizeable'):
return self._is_downsizeable
self._is_downsizeable = False
2017-05-23 11:14:47 +01:00
""" Check if the image is large enough and jpeg or png in order to
downsize it """
fb = self.sizes[-1][0]
ftype = self.meta.get('FileType', None)
if not ftype:
2017-06-03 12:07:03 +01:00
return self._is_downsizeable
2017-05-23 11:14:47 +01:00
if ftype.lower() == 'jpeg' or ftype.lower() == 'png':
width = int(self.meta.get('ImageWidth', 0))
height = int(self.meta.get('ImageHeight', 0))
if width > fb or height > fb:
2017-06-03 12:07:03 +01:00
self._is_downsizeable = True
return self._is_downsizeable
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
def _copy(self):
target = os.path.join(
shared.config.get('target', 'filesdir'),
"%s%s" % (self.fname, self.ext)
)
if not os.path.isfile(target):
logging.debug("can't downsize %s, copying instead" % self.fname)
shutil.copy(self.fpath, target)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
def _watermark(self, img):
""" Composite image by adding watermark file over it """
wmarkfile = os.path.join(
shared.config.get('common', 'basedir'),
shared.config.get('common', 'watermark')
)
if not os.path.isfile(wmarkfile):
return img
with wand.image.Image(filename=wmarkfile) as wmark:
if img.width > img.height:
w = img.width * 0.16
h = wmark.height * (w / wmark.width)
x = img.width - w - (img.width * 0.01)
y = img.height - h - (img.height * 0.01)
else:
w = img.height * 0.16
h = wmark.height * (w / wmark.width)
x = img.width - h - (img.width * 0.01)
y = img.height - w - (img.height * 0.01)
w = round(w)
h = round(h)
x = round(x)
y = round(y)
wmark.resize(w, h)
if img.width <= img.height:
wmark.rotate(-90)
img.composite(image=wmark, left=x, top=y)
return img
def _intermediate_dimensions(self, size, width, height, crop = False):
size = int(size)
w = width
h = height
if (width > height and not crop) \
or (width < height and crop):
w = size
h = int(float(size / width) * height)
2017-05-23 11:13:35 +01:00
else:
2017-05-23 11:14:47 +01:00
h = size
w = int(float(size / height) * width)
return (w, h)
def _intermediate(self, img, size, meta, existing = []):
if img.width <= size and img.height <= size:
return False
crop = meta.get('crop', False)
with img.clone() as thumb:
width, height = self._intermediate_dimensions(
size,
img.width,
img.height,
crop
)
thumb.resize(width, height)
if crop:
thumb.liquid_rescale(size, size, 1, 1)
if self.meta.get('FileType', 'jpeg').lower() == 'jpeg':
thumb.compression_quality = 86
thumb.unsharp_mask(
radius=0,
sigma=0.5,
amount=1,
threshold=0.03
2017-05-23 11:13:35 +01:00
)
2017-05-23 11:14:47 +01:00
thumb.format = 'pjpeg'
# this is to make sure pjpeg happens
with open(meta['fpath'], 'wb') as f:
thumb.save(file=f)
return True
async def downsize(self, existing = []):
if not self.is_downsizeable:
self._copy()
return
logging.info("checking downsizing for %s", self.fname)
needed = shared.config.getboolean('params', 'regenerate', fallback=False)
if not needed:
for (size, meta) in self.sizes:
if meta['fpath'] not in existing:
needed = True
if not needed:
logging.debug("downsizing not needed for %s", self.fname)
return
with wand.image.Image(filename=self.fpath) as img:
img.auto_orient()
if self.is_photo:
logging.info("%s is a photo", self.fpath)
img = self._watermark(img)
for (size, meta) in self.sizes:
self._intermediate(img, size, meta, existing)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
class Taxonomy(BaseIter):
def __init__(self, name = None, taxonomy = None, slug = None):
super(Taxonomy, self).__init__()
self.name = name
if name and not slug:
self.slug = slugify(name, only_ascii=True, lower=True)
else:
self.slug = slug
self.taxonomy = taxonomy
2017-06-12 15:40:30 +01:00
#@property
#def pages(self):
#if hasattr(self, '_pages'):
#return self._pages
#self._pages = math.ceil(len(self.data) / shared.config.getint('common', 'pagination'))
#return self._pages
2017-05-23 11:14:47 +01:00
def __repr__(self):
return "taxonomy %s with %d items" % (self.taxonomy, len(self.data))
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def basep(self):
p = shared.config.get('target', 'builddir')
if self.taxonomy:
p = os.path.join(p, self.taxonomy)
return p
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def myp(self):
p = self.basep
if self.slug:
return os.path.join(p,self.slug)
return p
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def feedp(self):
return os.path.join(self.myp, 'feed')
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def pagep(self):
return os.path.join(self.myp, 'page')
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def baseurl(self):
if self.taxonomy and self.slug:
return "/%s/%s/" % (self.taxonomy, self.slug)
else:
return '/'
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def mtime(self):
2017-06-02 11:19:55 +01:00
if hasattr(self, '_mtime'):
return self._mtime
self._mtime = int(list(sorted(self.data.keys(), reverse=True))[0])
return self._mtime
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
def __mkdirs(self):
check = [self.basep, self.myp, self.feedp]
for p in check:
if not os.path.isdir(p):
logging.debug("creating dir %s", p)
os.mkdir(p)
def tpath(self, page):
if page == 1:
p = "%s" % (self.myp)
2017-05-23 11:14:47 +01:00
else:
p = os.path.join(self.pagep, "%d" % page)
if not os.path.isdir(p):
logging.debug("creating dir %s", p)
os.mkdir(p)
return os.path.join(p, "index.html")
@property
def is_singlepage(self):
spcats = shared.config.get('common', 'onepagecategories').split(',')
if self.name in spcats and 'category' == self.taxonomy:
return True
return False
def posttmpls(self, order='time', start=0, end=None):
end = end or len(self.data)
if 'all' == order:
return [
i.tmplvars
for k, i in list(sorted(
self.data.items(),
key=lambda value: value[1].title.lower()
))
]
return [
self.data[k].tmplvars
for k in list(sorted(
self.data.keys(),
reverse=True
))[start:end]
]
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
async def render(self, renderer):
#if not self.slug or self.slug is 'None':
#return
self.__mkdirs()
page = 1
testpath = self.tpath(page)
if not shared.config.getboolean('params', 'force') and os.path.isfile(testpath):
ttime = int(os.path.getmtime(testpath))
mtime = self.mtime
if ttime == mtime:
logging.info('taxonomy index for "%s" exists and up-to-date (lastmod: %d)', self.slug, ttime)
return
else:
logging.info('taxonomy update needed: %s timestamp is %d, last post timestamp is %d (%s)',
testpath,
ttime,
mtime,
self.data[mtime].fname
)
if self.is_singlepage:
pagination = len(self.data)
else:
pagination = shared.config.getint('common', 'pagination')
pages = math.ceil(len(self.data) / pagination)
while page <= pages:
self.render_page(renderer, page, pagination, pages)
page = page+1
self.render_feeds(renderer)
self.ping_websub
def render_feeds(self, renderer):
pagination = shared.config.getint('common', 'pagination')
start = 0
end = int(start + pagination)
posttmpls = self.posttmpls('time', start, end)
tmplvars = {
'taxonomy': {
'url': self.baseurl,
'name': self.name,
'slug': self.slug,
'taxonomy': self.taxonomy,
'lastmod': arrow.get(self.mtime).datetime
},
'site': renderer.sitevars,
'posts': posttmpls,
}
target = os.path.join(self.feedp, 'index.atom')
logging.info("rendering Atom feed to %s", target)
r = renderer.j2.get_template('atom.html').render(tmplvars)
with open(target, "wt") as html:
html.write(r)
os.utime(target, (self.mtime, self.mtime))
def render_page(self, renderer, page, pagination, pages):
if self.is_singlepage:
posttmpls = self.posttmpls('all')
else:
start = int((page-1) * pagination)
end = int(start + pagination)
posttmpls = self.posttmpls('time', start, end)
2017-05-23 11:14:47 +01:00
target = self.tpath(page)
tdir = os.path.dirname(target)
if not os.path.isdir(tdir):
logging.debug("creating dir %s", tdir)
os.mkdir(tdir)
2017-05-23 11:14:47 +01:00
logging.info("rendering taxonomy page %d to %s", page, target)
tmplvars = {
'taxonomy': {
'url': self.baseurl,
'name': self.name,
'slug': self.slug,
2017-05-23 11:14:47 +01:00
'taxonomy': self.taxonomy,
'paged': page,
'total': pages,
2017-06-12 15:17:29 +01:00
'perpage': pagination,
'lastmod': arrow.get(self.mtime).datetime
2017-05-23 11:14:47 +01:00
},
'site': renderer.sitevars,
'posts': posttmpls,
}
r = renderer.j2.get_template('archive.html').render(tmplvars)
with open(target, "wt") as html:
html.write(r)
2017-06-02 11:19:55 +01:00
os.utime(target, (self.mtime, self.mtime))
2017-05-23 11:14:47 +01:00
def ping_websub(self):
if not self.taxonomy or self.taxonomy == 'category':
t = shared.config.get('site', 'websuburl')
data = {
'hub.mode': 'publish',
'hub.url': "%s%s" % (
shared.config.get('site', 'url'), self.baseurl
)
}
logging.info("pinging %s with data %s", t, data)
requests.post(t, data=data)
#def renderpage(self, renderer, page):
#pagination = int(shared.config.get('common', 'pagination'))
#start = int((page-1) * pagination)
#end = int(start + pagination)
#posttmpls = [self.data[k].tmplvars for k in list(sorted(
#self.data.keys(), reverse=True))[start:end]]
#target = self.tpath(page)
#logging.info("rendering taxonomy page %d to %s", page, target)
#tmplvars = {
#'taxonomy': {
#'url': self.baseurl,
#'name': self.name,
#'slug': self.slug,
#'taxonomy': self.taxonomy,
#'paged': page,
#'total': self.pages,
#'perpage': pagination,
#'lastmod': arrow.get(self.mtime).datetime
#},
#'site': renderer.sitevars,
#'posts': posttmpls,
#}
#r = renderer.j2.get_template('archive.html').render(tmplvars)
#with open(target, "wt") as html:
#html.write(r)
#os.utime(target, (self.mtime, self.mtime))
#if 1 == page:
##target = os.path.join(self.feedp, 'index.rss')
##logging.info("rendering RSS feed to %s", target)
##r = renderer.j2.get_template('rss.html').render(tmplvars)
##with open(target, "wt") as html:
##html.write(r)
##os.utime(target, (self.mtime, self.mtime))
#target = os.path.join(self.feedp, 'index.atom')
#logging.info("rendering Atom feed to %s", target)
#r = renderer.j2.get_template('atom.html').render(tmplvars)
2017-06-28 12:20:26 +01:00
#with open(target, "wt") as html:
#html.write(r)
#os.utime(target, (self.mtime, self.mtime))
## ---
## this is a joke
## see http://indieweb.org/YAMLFeed
## don't do YAMLFeeds.
#if 1 == page:
#fm = frontmatter.loads('')
#fm.metadata = {
#'site': {
#'author': renderer.sitevars['author'],
#'url': renderer.sitevars['url'],
#'title': renderer.sitevars['title'],
#},
#'items': [],
#}
#for p in posttmpls:
#fm.metadata['items'].append({
#'title': p['title'],
#'url': "%s/%s/" % ( renderer.sitevars['url'], p['slug']),
#'content': p['content'],
#'summary': p['summary'],
#'published': p['published'],
#'updated': p['updated'],
#})
#target = os.path.join(self.feedp, 'index.yml')
#logging.info("rendering YAML feed to %s", target)
#with open(target, "wt") as html:
#html.write(frontmatter.dumps(fm))
#os.utime(target, (self.mtime, self.mtime))
## ---
2017-06-03 12:07:03 +01:00
#if 1 == page:
#if not self.taxonomy or self.taxonomy == 'category':
#t = shared.config.get('site', 'websuburl')
#data = {
#'hub.mode': 'publish',
#'hub.url': "%s%s" % (
#shared.config.get('site', 'url'), self.baseurl
#)
#}
#logging.info("pinging %s with data %s", t, data)
#requests.post(t, data=data)
2017-06-28 12:20:26 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
class Content(BaseIter):
2017-06-02 11:19:55 +01:00
def __init__(self, images, comments, extensions=['md']):
2017-05-23 11:14:47 +01:00
super(Content, self).__init__()
self.images = images
2017-06-02 11:19:55 +01:00
self.comments = comments
2017-05-23 11:14:47 +01:00
basepath = shared.config.get('source', 'contentdir')
self.files = []
for ext in extensions:
self.files += glob.glob(os.path.join(basepath, "*", "*.%s" % ext))
self.tags = {}
self.categories = {}
self.front = Taxonomy()
2017-06-02 11:19:55 +01:00
self.shortslugmap = {}
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
def populate(self):
now = arrow.utcnow().timestamp
2017-05-23 11:14:47 +01:00
for fpath in self.files:
2017-06-02 11:19:55 +01:00
item = Singular(fpath, self.images, self.comments)
2017-05-23 11:14:47 +01:00
self.append(item.pubtime, item)
2017-06-02 11:19:55 +01:00
#self.shortslugmap[item.shortslug] = item.fname
2017-05-23 11:14:47 +01:00
if item.isfuture:
logging.warning("skipping future post %s", item.fname)
continue
2017-05-23 11:14:47 +01:00
if item.isonfront:
self.front.append(item.pubtime, item)
if item.iscategorised:
if item.category not in self.categories:
self.categories[item.category] = Taxonomy(item.category, 'category')
self.categories[item.category].append(item.pubtime, item)
for tag in item.tags:
tslug = slugify(tag, only_ascii=True, lower=True)
if tslug not in self.tags:
self.tags[tslug] = Taxonomy(tag, 'tag', tslug)
self.tags[tslug].append(item.pubtime, item)
#self.symlinktag(tslug, item.path)
#def symlinktag(self, tslug, fpath):
#fdir, fname = os.path.split(fpath)
#tagpath = os.path.join(shared.config.get('source', 'tagsdir'), tslug)
#if not os.path.isdir(tagpath):
#os.mkdir(tagpath)
#sympath = os.path.relpath(fdir, tagpath)
#dst = os.path.join(tagpath, fname)
#src = os.path.join(sympath, fname)
#if not os.path.islink(dst):
#os.symlink(src, dst)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
def sitemap(self):
target = os.path.join(
shared.config.get('target', 'builddir'),
'sitemap.txt'
)
urls = []
for item in self.data.values():
2017-05-23 11:14:47 +01:00
urls.append( "%s/%s/" % (
shared.config.get('site', 'url'),
item.fname
))
with open(target, "wt") as f:
logging.info("writing sitemap to %s" % (target))
f.write("\n".join(urls))
2017-06-12 15:40:30 +01:00
def magicphp(self, renderer):
redirects = []
gones = []
rfile = os.path.join(
shared.config.get('common', 'basedir'),
shared.config.get('common', 'redirects')
)
if os.path.isfile(rfile):
with open(rfile, newline='') as csvfile:
r = csv.reader(csvfile, delimiter=' ')
for row in r:
redirects.append((row[0], row[1]))
for item in self.data.values():
redirects.append((item.shortslug, item.fname))
rfile = os.path.join(
shared.config.get('common', 'basedir'),
shared.config.get('common', 'gone')
)
if os.path.isfile(rfile):
with open(rfile, newline='') as csvfile:
r = csv.reader(csvfile, delimiter=' ')
for row in r:
gones.append(row[0])
tmplvars = {
'site': renderer.sitevars,
'redirects': redirects,
'gones': gones
}
r = renderer.j2.get_template("magic.php").render(tmplvars)
target = os.path.abspath(os.path.join(
shared.config.get('target', 'builddir'),
'magic.php'
))
with open(target, "w") as html:
logging.debug('writing %s', target)
html.write(r)
html.close()
2017-06-12 15:40:30 +01:00
class Singular(BaseRenderable):
def __init__(self, path, images = None, comments = None):
2017-05-23 11:14:47 +01:00
logging.debug("initiating singular object from %s", path)
self.path = path
self.images = images or Images()
self.allcomments = comments or Comments()
2017-05-23 11:14:47 +01:00
self.category = splitpath(path)[-2]
self.mtime = int(os.path.getmtime(self.path))
self.fname, self.ext = os.path.splitext(os.path.basename(self.path))
self.meta = {}
self.content = ''
self.photo = self.images.data.get("%s.jpg" % self.fname, None)
if self.photo:
self.photo.singleimage = True
2017-05-23 11:14:47 +01:00
self.__parse()
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
def __repr__(self):
return "%s (lastmod: %s)" % (self.fname, self.published)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
def __parse(self):
with open(self.path, mode='rt') as f:
self.meta, self.content = frontmatter.parse(f.read())
#self.__filter_syndication()
2017-06-12 15:17:29 +01:00
self.__filter_favs()
self.__filter_images()
#if self.isphoto:
#self.content = "%s\n\n%s" % (
#self.photo,
#self.content,
#)
#if shared.config.getboolean('params', 'nooffline'):
#return
#trigger = self.offlinecopies
2017-06-12 15:17:29 +01:00
#def __filter_syndication(self):
#syndications = self.meta.get('syndicate', None)
#if not syndications:
#return
#s = "\n\n".join(['<a href="%s" class="u-syndication"></a>' % s for s in syndications])
#self.content = "%s\n\n%s" % (
#s,
#self.content
#)
2017-06-12 15:40:30 +01:00
2017-06-12 15:17:29 +01:00
def __filter_favs(self):
url = self.meta.get('favorite-of',
self.meta.get('like-of',
self.meta.get('bookmark-of',
False
)
)
)
2017-07-04 12:30:32 +01:00
2017-06-12 15:17:29 +01:00
if not url:
return
2017-07-04 12:30:32 +01:00
img = self.meta.get('image', False)
imgs = self.meta.get('images', [])
if img:
imgs.append(img)
if not imgs or not len(imgs):
return
c = ''
for i in imgs:
c = '%s\n[![%s](/%s/%s)](%s){.favurl}' % (
c,
self.title,
shared.config.get('source', 'files'),
i,
url
)
2017-06-12 15:17:29 +01:00
if self.isbookmark:
c = "%s\n\n%s" % (c, self.content)
self.content = c
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
def __filter_images(self):
linkto = False
isrepost = None
if len(self.reactions.keys()):
isrepost = list(self.reactions.keys())[0]
if isrepost and \
len(self.reactions[isrepost]) == 1:
linkto = self.reactions[isrepost][0]
m = shared.MDIMGREGEX.findall(self.content)
if not m:
logging.debug("no images found")
return
for shortcode, alt, fname, title, cl in m:
image = self.images.data.get(fname, None)
if not image:
logging.debug("%s not found in images", fname)
continue
if cl:
image.cl = cl
2017-05-23 11:14:47 +01:00
logging.debug(
"replacing %s in content with %s",
shortcode,
"%s" % image
)
self.content = self.content.replace(
shortcode,
"%s" % image
)
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def comments(self):
if hasattr(self, '_comments'):
return self._comments
# the comments could point to both the "real" url and the shortslug
# so I need to get both
c = {}
for by in [self.fname, self.shortslug]:
c = {**c, **self.allcomments[by].data}
#self._comments = [c[k].tmplvars for k in list(sorted(c.keys(), reverse=True))]
self._comments = [c[k] for k in list(sorted(c.keys(), reverse=False))]
2017-06-02 11:19:55 +01:00
return self._comments
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def replies(self):
if hasattr(self, '_replies'):
return self._replies
self._replies = [c.tmplvars for c in self.comments if not len(c.reacji)]
return self._replies
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def reacjis(self):
if hasattr(self, '_reacjis'):
return self._reacjis
reacjis = {}
for c in self.comments:
rj = c.reacji
if not len(rj):
continue
if not reacjis.get(rj, False):
reacjis[rj] = []
reacjis[rj].append(c.tmplvars)
self._reacjis = reacjis
return self._reacjis
2017-05-23 11:14:47 +01:00
@property
def reactions(self):
2017-06-03 12:07:03 +01:00
if hasattr(self, '_reactions'):
return self._reactions
2017-05-23 11:14:47 +01:00
# getting rid of '-' to avoid css trouble and similar
convert = {
'bookmark-of': 'bookmark',
'repost-of': 'repost',
'in-reply-to': 'reply',
2017-06-12 15:17:29 +01:00
'favorite-of': 'fav',
'like-of': 'like',
2017-05-23 11:14:47 +01:00
}
reactions = {}
for k, v in convert.items():
x = self.meta.get(k, None)
if not x:
continue
if isinstance(x, str):
x = [x]
reactions[v] = x
2017-06-03 12:07:03 +01:00
self._reactions = reactions
return self._reactions
2017-05-23 11:14:47 +01:00
@property
def author(self):
return dict(shared.config.items('author'))
@property
def license(self):
if hasattr(self, '_licence'):
return self._licence
if 'article' == self.category:
l = {
'url': 'https://creativecommons.org/licenses/by/4.0/',
'text': 'CC BY 4.0',
'description': 'Licensed under <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International</a>. You are free to share or republish, even if modified, if you link back here and indicate the modifications, even for commercial use.'
}
elif 'journal' == self.category:
l = {
'url': 'https://creativecommons.org/licenses/by-nc/4.0/',
'text': 'CC BY-NC 4.0',
'description': 'Licensed under <a href="https://creativecommons.org/licenses/by-nc/4.0/">Creative Commons Attribution-NonCommercial 4.0 International</a>. You are free to share or republish, even if modified, if you link back here and indicate the modifications, for non commercial use. For commercial use please contact the author.'
}
else:
l = {
'url': 'https://creativecommons.org/licenses/by-nc-nd/4.0/',
'text': 'CC BY-NC-ND 4.0',
'description': 'Licensed under <a href="https://creativecommons.org/licenses/by-nc-nd/4.0/">Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International</a>. You are free to share if you link back here for non commercial use, but you can\'t publish any altered versions of it. For commercial use please contact the author.'
}
self._licence = l
return self._licence
2017-06-12 15:40:30 +01:00
@property
def syndicate(self):
return self.meta.get('syndicate', [])
@property
def urls(self):
2017-06-03 12:07:03 +01:00
if hasattr(self, '_urls'):
return self._urls
#urls = shared.URLREGEX.findall(self.content)
#urls = [*urls, *self.syndicate]
urls = list(self.syndicate)
for reactionurls in self.reactions.values():
urls = [*urls, *reactionurls]
#r = []
#logging.debug('searching for urls for %s', self.fname)
#for link in urls:
#purl = urllib.parse.urlparse(link)
#if purl.netloc in shared.config.get('site', 'domains'):
#logging.debug('excluding url %s - %s matches %s', link, purl.netloc, shared.config.get('site', 'domains'))
#continue
#if link in r:
#continue
#r.append(link)
self._urls = urls
logging.debug('urls for %s: %s', self.fname, urls)
2017-06-03 12:07:03 +01:00
return self._urls
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def lang(self):
2017-06-03 12:07:03 +01:00
if hasattr(self, '_lang'):
return self._lang
2017-05-23 11:14:47 +01:00
lang = 'en'
try:
lang = langdetect.detect("\n".join([
self.title,
self.content
]))
except:
pass
2017-06-03 12:07:03 +01:00
self._lang = lang
return self._lang
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def tags(self):
return list(self.meta.get('tags', []))
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def published(self):
2017-06-02 11:19:55 +01:00
if hasattr(self, '_published'):
return self._published
self._published = arrow.get(
2017-05-23 11:14:47 +01:00
self.meta.get('published', self.mtime)
)
2017-06-02 11:19:55 +01:00
return self._published
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def updated(self):
2017-06-02 11:19:55 +01:00
if hasattr(self, '_updated'):
return self._updated
self._updated = arrow.get(
2017-05-23 11:14:47 +01:00
self.meta.get('updated',
self.meta.get('published', self.mtime)
)
)
2017-06-02 11:19:55 +01:00
return self._updated
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def pubtime(self):
return int(self.published.timestamp)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def isphoto(self):
if not self.photo:
return False
return self.photo.is_photo
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def isbookmark(self):
return self.meta.get('bookmark-of', False)
2017-06-12 15:40:30 +01:00
2017-06-12 15:17:29 +01:00
@property
def isreply(self):
return self.meta.get('in-reply-to', False)
@property
def isfuture(self):
now = arrow.utcnow().timestamp
if self.pubtime > now:
return True
return False
2017-06-12 15:40:30 +01:00
2017-06-12 15:17:29 +01:00
# TODO
#@property
#def isrvsp(self):
# r'<data class="p-rsvp" value="([^"])">([^<]+)</data>'
2017-06-12 15:40:30 +01:00
2017-06-12 15:17:29 +01:00
@property
def isfav(self):
r = False
for maybe in ['like-of', 'favorite-of']:
maybe = self.meta.get(maybe, False)
if maybe:
r = maybe
break
return r
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def ispage(self):
if not self.meta:
return True
return False
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def isonfront(self):
2017-06-12 15:17:29 +01:00
if self.ispage:
return False
if self.isbookmark:
return False
if self.isfav:
2017-05-23 11:14:47 +01:00
return False
return True
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def iscategorised(self):
if self.ispage:
return False
return True
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def summary(self):
return self.meta.get('summary', '')
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def title(self):
2017-06-03 12:07:03 +01:00
if hasattr(self, '_title'):
return self._title
self._title = ''
2017-05-23 11:14:47 +01:00
for maybe in ['title', 'bookmark-of', 'in-reply-to', 'repost-of']:
maybe = self.meta.get(maybe, False)
if maybe:
2017-07-04 12:30:32 +01:00
if isinstance(maybe, list):
maybe = maybe.pop()
self._title = maybe.replace('\n', ' ').replace('\r', '')
2017-06-03 12:07:03 +01:00
break
return self._title
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def url(self):
return "%s/%s/" % (shared.config.get('site', 'url'), self.fname)
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def tmplfile(self):
if self.ispage:
return 'page.html'
2017-05-23 11:13:35 +01:00
else:
2017-05-23 11:14:47 +01:00
return 'singular.html'
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def html(self):
2017-06-02 11:19:55 +01:00
if hasattr(self, '_html'):
return self._html
self._html = shared.Pandoc().convert(self.content)
return self._html
2017-06-12 15:40:30 +01:00
2017-06-02 11:19:55 +01:00
@property
def sumhtml(self):
if hasattr(self, '_sumhtml'):
return self._sumhtml
self._sumhtml = self.meta.get('summary', '')
if len(self._sumhtml):
self._sumhtml = shared.Pandoc().convert(self.summary)
return self._sumhtml
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
#@property
#def offlinecopies(self):
## stupidly simple property caching
#if hasattr(self, 'copies'):
#return self.copies
#copies = {}
#for maybe in ['bookmark-of', 'in-reply-to', 'repost-of', 'favorite-of']:
#maybe = self.meta.get(maybe, False)
#if not maybe:
#continue
#if not isinstance(maybe, list):
#maybe = [maybe]
#for url in maybe:
#arch = OfflineArchive(url)
#arch.run()
##copies[url] = arch.read()
2017-05-23 11:14:47 +01:00
##self.copies = copies
##return copies
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def exif(self):
if not self.isphoto:
2017-06-12 15:17:29 +01:00
return {}
return self.photo.exif
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
#@property
#def rssenclosure(self):
#if not self.isphoto:
#return {}
#return self.photo.rssenclosure
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def tmplvars(self):
2017-06-02 11:19:55 +01:00
if hasattr(self, '_tmplvars'):
return self._tmplvars
self._tmplvars = {
2017-05-23 11:14:47 +01:00
'title': self.title,
'published': self.published.datetime,
'tags': self.tags,
'author': self.author,
2017-05-23 11:14:47 +01:00
'content': self.content,
'html': self.html,
'category': self.category,
'reactions': self.reactions,
'updated': self.updated.datetime,
2017-06-12 15:17:29 +01:00
'summary': self.summary,
'sumhtml': self.sumhtml,
2017-05-23 11:14:47 +01:00
'exif': self.exif,
'lang': self.lang,
'syndicate': self.syndicate,
2017-05-23 11:14:47 +01:00
'slug': self.fname,
'shortslug': self.shortslug,
2017-06-02 11:19:55 +01:00
'comments': self.comments,
'replies': self.replies,
'reacjis': self.reacjis,
'photo': {},
'rssenclosure': {},
'license': self.license,
2017-05-23 11:14:47 +01:00
}
if self.isphoto:
self._tmplvars.update({
'photo': self.photo.tmplvars,
'rssenclosure': self.photo.rssenclosure
})
2017-06-02 11:19:55 +01:00
return self._tmplvars
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
@property
def shortslug(self):
2017-06-02 11:19:55 +01:00
if hasattr(self, '_shortslug'):
return self._shortslug
2017-06-12 15:17:29 +01:00
self._shortslug = shared.baseN(self.pubtime)
2017-06-02 11:19:55 +01:00
return self._shortslug
2017-05-23 11:14:47 +01:00
2017-06-12 15:40:30 +01:00
2017-06-28 12:20:26 +01:00
@property
def target(self):
targetdir = os.path.abspath(os.path.join(
shared.config.get('target', 'builddir'),
self.fname
))
return os.path.join(targetdir, 'index.html')
async def rendercomments(self, renderer):
for comment in self.comments:
await comment.render(renderer)
2017-06-12 15:40:30 +01:00
async def render(self, renderer = None):
renderer = renderer or Renderer()
2017-06-03 12:07:03 +01:00
# this is only when I want salmentions and I want to include all of the comments as well
# otherwise it affects both webmentions sending and search indexing
#if len(self.comments):
#lctime = self.comments[0].mtime
#if lctime > self.mtime:
#self.mtime = lctime
#await self.rendercomments(renderer)
2017-06-03 12:07:03 +01:00
mtime = self.mtime
2017-06-02 11:19:55 +01:00
if len(self.comments):
lctime = self.comments[0].mtime
if lctime > self.mtime:
2017-06-03 12:07:03 +01:00
mtime = lctime
2017-06-02 11:19:55 +01:00
2017-05-23 11:14:47 +01:00
logging.info("rendering and saving %s", self.fname)
2017-06-28 12:20:26 +01:00
if not shared.config.getboolean('params', 'force') and os.path.isfile(self.target):
ttime = int(os.path.getmtime(self.target))
2017-06-03 12:07:03 +01:00
logging.debug('ttime is %d mtime is %d', ttime, mtime)
if ttime == mtime:
2017-06-28 12:20:26 +01:00
logging.debug(
'%s exists and up-to-date (lastmod: %d)',
self.target,
ttime
)
2017-05-23 11:14:47 +01:00
return
tmplvars = {
'post': self.tmplvars,
'site': renderer.sitevars,
'taxonomy': {},
}
r = renderer.j2.get_template(self.tmplfile).render(tmplvars)
2017-06-28 12:20:26 +01:00
self.writerendered(r, mtime)
2017-06-02 11:19:55 +01:00
async def ping(self, pinger):
if self.isfuture:
return
logging.debug('urls in %s: %s', self.fname, self.urls)
2017-06-02 11:19:55 +01:00
for target in self.urls:
record = {
'mtime': self.mtime,
'source': self.url,
'target': target
}
h = json.dumps(record, sort_keys=True)
h = hashlib.sha1(h.encode('utf-8')).hexdigest()
if pinger.db.get(h, False):
logging.debug(
"%s is already pinged from %s @ %d, skipping",
target, self.url, self.mtime
)
continue
2017-05-23 11:14:47 +01:00
2017-06-02 11:19:55 +01:00
logging.info("sending webmention from %s to %s", self.url, target)
ws = WebmentionSend(self.url, target)
2017-06-12 15:17:29 +01:00
try:
ws.send(allow_redirects=True, timeout=30)
except Exception as e:
logging.error('ping failed to %s', target)
2017-06-02 11:19:55 +01:00
pinger.db[h] = record
2017-06-12 15:40:30 +01:00
class Webmentioner(object):
def __init__(self):
self.dbpath = os.path.abspath(os.path.join(
shared.config.get('target', 'builddir'),
shared.config.get('var', 'webmentions')
))
if os.path.isfile(self.dbpath):
with open(self.dbpath, 'rt') as f:
self.db = json.loads(f.read())
else:
self.db = {}
2017-06-12 15:40:30 +01:00
def finish(self):
with open(self.dbpath, 'wt') as f:
f.write(json.dumps(self.db, sort_keys=True, indent=4))
2017-05-23 11:14:47 +01:00
class NASG(object):
2017-06-02 11:19:55 +01:00
lockfile = os.path.join(tempfile.gettempdir(), 'nasg_%s.lock' % getpass.getuser())
2017-05-23 11:14:47 +01:00
def __init__(self):
# --- set params
parser = argparse.ArgumentParser(description='Parameters for NASG')
parser.add_argument(
'--clear',
action='store_true',
default=False,
help='clear build directory in advance'
)
2017-05-23 11:14:47 +01:00
parser.add_argument(
'--regenerate',
action='store_true',
default=False,
help='force downsizing images'
)
parser.add_argument(
'--force',
action='store_true',
default=False,
help='force rendering HTML'
)
parser.add_argument(
'--loglevel',
default='error',
2017-05-23 11:14:47 +01:00
help='change loglevel'
)
parser.add_argument(
'--nooffline',
action='store_true',
default=False,
help='skip offline copie checks'
)
2017-05-23 11:14:47 +01:00
parser.add_argument(
'--nodownsize',
action='store_true',
default=False,
help='skip image downsizing'
)
parser.add_argument(
'--norender',
action='store_true',
default=False,
help='skip rendering'
)
2017-06-28 12:20:26 +01:00
parser.add_argument(
'--refetch',
action='store_true',
default=False,
help='force re-fetching offline archives'
)
2017-05-23 11:14:47 +01:00
params = vars(parser.parse_args())
shared.config.add_section('params')
for k, v in params.items():
shared.config.set('params', k, str(v))
# remove the rest of the potential loggers
while len(logging.root.handlers) > 0:
logging.root.removeHandler(logging.root.handlers[-1])
# --- set loglevel
logging.basicConfig(
2017-07-05 22:09:06 +01:00
level=shared.LLEVEL[shared.config.get('params', 'loglevel')],
2017-05-23 11:14:47 +01:00
format='%(asctime)s - %(levelname)s - %(message)s'
)
async def __adownsize(self):
for fname, img in self.images:
await img.downsize(self.existing)
2017-05-23 11:14:47 +01:00
async def __acrender(self):
for (pubtime, singular) in self.content:
await singular.render(self.renderer)
2017-05-23 11:14:47 +01:00
async def __atrender(self):
for e in [self.content.categories, self.content.tags]:
2017-05-23 11:14:47 +01:00
for name, t in e.items():
await t.render(self.renderer)
2017-05-23 11:14:47 +01:00
async def __afrender(self):
await self.content.front.render(self.renderer)
2017-05-23 11:14:47 +01:00
async def __aindex(self):
for (pubtime, singular) in self.content:
await self.searchdb.append(singular)
2017-05-23 11:14:47 +01:00
async def __aping(self):
for (pubtime, singular) in self.content:
await singular.ping(self.pinger)
def __atindexrender(self):
m = {
'category': self.content.categories,
'tag': self.content.tags
}
for p, taxonomy in m.items():
target = os.path.abspath(os.path.join(
shared.config.get('target', 'builddir'),
p,
'index.html'
))
tmplvars = {
'site': self.renderer.sitevars,
'taxonomy': {},
'taxonomies': {}
}
for name, t in taxonomy.items():
logging.debug('adding %s to taxonomyindex' % name)
tmplvars['taxonomies'][name] = [{'title': t.data[k].title, 'url': t.data[k].url} for k in list(sorted(
t.data.keys(), reverse=True))]
tmplvars['taxonomies'] = sorted(tmplvars['taxonomies'].items())
logging.debug('rendering taxonomy index to %s', target)
r = self.renderer.j2.get_template('archiveindex.html').render(tmplvars)
with open(target, "wt") as html:
html.write(r)
@property
def images(self):
if hasattr(self, '_images'):
return self._images
logging.info("discovering images")
images = Images()
images.populate()
self._images = images
return self._images
@property
def content(self):
if hasattr(self, '_content'):
return self._content
logging.info("discovering content")
content = Content(self.images, self.comments)
content.populate()
self._content = content
return self._content
@property
def comments(self):
if hasattr(self, '_comments'):
return self._comments
logging.info("discovering comments")
comments = Comments()
comments.populate()
self._comments = comments
return self._comments
@property
def existing(self):
if hasattr(self, '_existing'):
return self._existing
existing = glob.glob(os.path.join(
shared.config.get('target', 'filesdir'),
"*"
))
self._existing = existing
return self._existing
@property
def renderer(self):
if hasattr(self, '_renderer'):
return self._renderer
self._renderer = Renderer()
return self._renderer
@property
def searchdb(self):
if hasattr(self, '_searchdb'):
return self._searchdb
self._searchdb = Indexer()
return self._searchdb
@property
def pinger(self):
if hasattr(self, '_pinger'):
return self._pinger
self._pinger = Webmentioner()
return self._pinger
2017-06-12 15:40:30 +01:00
2017-05-23 11:14:47 +01:00
def run(self):
2017-06-02 11:19:55 +01:00
if os.path.isfile(self.lockfile):
raise ValueError(
"Lockfile is present at %s; another instance is running." % (
self.lockfile
)
)
else:
atexit.register(os.remove, self.lockfile)
with open(self.lockfile, "wt") as f:
f.write(arrow.utcnow().format())
if shared.config.getboolean('params', 'clear'):
input('about to clear build directory, press enter to continue')
shutil.rmtree(os.path.abspath(
shared.config.get('target', 'builddir')
))
2017-05-23 11:14:47 +01:00
loop = asyncio.get_event_loop()
for d in shared.config.options('target'):
if 'dir' in d and not os.path.isdir(shared.config.get('target', d)):
os.mkdir(shared.config.get('target', d))
if not shared.config.getboolean('params', 'nodownsize'):
logging.info("downsizing images")
loop.run_until_complete(self.__adownsize())
2017-05-23 11:14:47 +01:00
if not shared.config.getboolean('params', 'norender'):
logging.info("rendering content")
loop.run_until_complete(self.__acrender())
2017-05-23 11:14:47 +01:00
logging.info("rendering categories and tags")
loop.run_until_complete(self.__atrender())
2017-05-23 11:14:47 +01:00
logging.info("rendering the front page elements")
loop.run_until_complete(self.__afrender())
2017-05-23 11:14:47 +01:00
logging.info("rendering taxonomy indexes")
self.__atindexrender()
2017-05-23 11:14:47 +01:00
logging.info("rendering sitemap")
self.content.sitemap()
2017-05-23 11:14:47 +01:00
logging.info("render magic.php")
self.content.magicphp(self.renderer)
2017-05-23 11:14:47 +01:00
logging.info("copy the static bits")
src = shared.config.get('source', 'staticdir')
for item in os.listdir(src):
s = os.path.join(src, item)
d = os.path.join(shared.config.get('target', 'builddir'), item)
logging.debug("copying %s to %s", s, d)
shutil.copy2(s, d)
logging.info("pouplating searchdb")
loop.run_until_complete(self.__aindex())
self.searchdb.finish()
2017-05-23 11:14:47 +01:00
logging.info("webmentioning urls")
loop.run_until_complete(self.__aping())
self.pinger.finish()
2017-05-23 11:14:47 +01:00
loop.close()
2017-05-23 11:13:35 +01:00
if __name__ == '__main__':
2017-05-23 11:14:47 +01:00
worker = NASG()
worker.run()