nasg/nasg.py

1342 lines
40 KiB
Python
Raw Normal View History

2017-05-23 11:14:47 +01:00
#!/usr/bin/env python3
import os
import re
import configparser
2017-05-23 11:13:35 +01:00
import argparse
2017-05-23 11:14:47 +01:00
import shutil
2017-05-23 11:13:35 +01:00
import logging
2017-05-23 11:14:47 +01:00
import json
import glob
import tempfile
import atexit
2017-05-23 11:13:35 +01:00
import re
2017-05-23 11:14:47 +01:00
import hashlib
import math
import asyncio
import csv
2017-05-23 11:14:47 +01:00
import magic
2017-05-23 11:13:35 +01:00
import arrow
2017-05-23 11:14:47 +01:00
import wand.image
import similar_text
import frontmatter
2017-05-23 11:13:35 +01:00
from slugify import slugify
2017-05-23 11:14:47 +01:00
import langdetect
import requests
from breadability.readable import Article
from whoosh import index
import jinja2
import urllib.parse
2017-05-23 11:14:47 +01:00
import shared
def splitpath(path):
parts = []
(path, tail) = os.path.split(path)
while path and tail:
parts.insert(0,tail)
(path,tail) = os.path.split(path)
return parts
class Indexer(object):
def __init__(self):
self.tmp = tempfile.mkdtemp(
'whooshdb_',
dir=tempfile.gettempdir()
)
atexit.register(
shutil.rmtree,
os.path.abspath(self.tmp)
)
self.ix = index.create_in(self.tmp, shared.schema)
self.target = os.path.abspath(os.path.join(
shared.config.get('target', 'builddir'),
shared.config.get('var', 'searchdb')
))
self.writer = self.ix.writer()
async def append(self, singular):
logging.info("appending search index with %s", singular.fname)
content_real = [
singular.fname,
singular.summary,
singular.content,
]
content_remote = []
for url, offlinecopy in singular.offlinecopies.items():
content_remote.append("%s" % offlinecopy)
weight = 1
if singular.isbookmark:
weight = 10
if singular.ispage:
weight = 100
2017-05-23 11:14:47 +01:00
self.writer.add_document(
title=singular.title,
url=singular.url,
content=" ".join(list(map(str,[*content_real, *content_remote]))),
date=singular.published.datetime,
tags=",".join(list(map(str, singular.tags))),
weight=weight,
2017-05-23 11:14:47 +01:00
img="%s" % singular.photo
)
def finish(self):
self.writer.commit()
if os.path.isdir(self.target):
shutil.rmtree(self.target)
shutil.copytree(self.tmp, self.target)
class OfflineCopy(object):
def __init__(self, url):
self.url = url
h = url.encode('utf-8')
self.fname = hashlib.sha1(h).hexdigest()
self.targetdir = os.path.abspath(
shared.config.get('source', 'offlinecopiesdir')
)
self.target = os.path.join(
self.targetdir,
self.fname
)
self.fm = frontmatter.loads('')
self.fm.metadata = {
'url': self.url,
'date': arrow.utcnow().format("YYYY-MM-DDTHH:mm:ssZ"),
}
def __repr__(self):
return self.fm.content
def write(self):
logging.info(
"savig offline copy of\n\t%s to:\n\t%s",
self.url,
self.target
)
with open(self.target, 'wt') as f:
f.write(frontmatter.dumps(self.fm))
def run(self):
if os.path.isfile(self.target):
with open(self.target) as f:
self.fm = frontmatter.loads(f.read())
return
logging.info("prepairing offline copy of %s", self.url)
headers = requests.utils.default_headers()
headers.update({
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
})
try:
r = requests.get(
self.url,
allow_redirects=True,
timeout=60,
headers=headers
)
except Exception as e:
logging.error("%s failed:\n%s", self.url, e)
self.write()
return
if r.status_code != requests.codes.ok:
logging.warning("%s returned %s", self.url, r.status_code)
self.write()
return
if not len(r.text):
logging.warning("%s was empty", self.url)
self.write()
return
doc = Article(r.text, url=self.url)
self.fm.metadata['title'] = doc._original_document.title
self.fm.metadata['realurl'] = r.url
self.fm.content = Pandoc(False).convert(doc.readable)
self.write()
class Renderer(object):
def __init__(self):
self.sitevars = dict(shared.config.items('site'))
self.sitevars['author'] = dict(shared.config.items('author'))
self.sitevars['author']['socials'] = dict(shared.config.items('socials'))
self.jinjaldr = jinja2.FileSystemLoader(
searchpath=shared.config.get('source', 'templatesdir')
)
self.j2 = jinja2.Environment(loader=self.jinjaldr)
self.j2.filters['date'] = Renderer.jinja_filter_date
self.j2.filters['search'] = Renderer.jinja_filter_search
self.j2.filters['slugify'] = Renderer.jinja_filter_slugify
@staticmethod
def jinja_filter_date(d, form='%Y-%m-%d %H:%m:%S'):
if d == 'now':
return arrow.now().strftime(form)
if form == 'c':
form = '%Y-%m-%dT%H:%M:%S%z'
return d.strftime(form)
@staticmethod
def jinja_filter_slugify(s):
return slugify(s, only_ascii=True, lower=True)
@staticmethod
def jinja_filter_search(s, r):
if r in s:
return True
return False
class BaseIter(object):
def __init__(self):
self.data = {}
def append(self, key, value):
if key in self.data:
logging.error("duplicate key: %s", key)
return
self.data[key] = value
def __getitem__(self, key):
return self.data.get(key, {})
def __repr__(self):
return json.dumps(list(self.data.values()))
def __next__(self):
try:
r = self.data.next()
except:
raise StopIteration()
return r
def __iter__(self):
for k, v in self.data.items():
yield (k, v)
return
#class CMDLine(object):
#def __init__(self, executable):
#self.executable = self._which(executable)
#if self.executable is None:
#raise OSError('No %s found in PATH!' % executable)
#return
#@staticmethod
#def _which(name):
#for d in os.environ['PATH'].split(':'):
#which = glob.glob(os.path.join(d, name), recursive=True)
#if which:
#return which.pop()
#return None
#def __enter__(self):
#self.process = subprocess.Popen(
#[self.executable, "-stay_open", "True", "-@", "-"],
#universal_newlines=True,
#stdin=subprocess.PIPE, stdout=subprocess.PIPE)
#return self
#def __exit__(self, exc_type, exc_value, traceback):
#self.process.stdin.write("-stay_open\nFalse\n")
#self.process.stdin.flush()
#def execute(self, *args):
#args = args + ("-execute\n",)
#self.process.stdin.write(str.join("\n", args))
#self.process.stdin.flush()
#output = ""
#fd = self.process.stdout.fileno()
#while not output.endswith(self.sentinel):
#output += os.read(fd, 4096).decode('utf-8', errors='ignore')
#return output[:-len(self.sentinel)]
#class Pandoc(CMDLine):
#""" Handles calling external binary `exiftool` in an efficient way """
#def __init__(self, md2html=True):
#super().__init__('pandoc')
#if md2html:
#self.i = "markdown+" + "+".join([
#'backtick_code_blocks',
#'auto_identifiers',
#'fenced_code_attributes',
#'definition_lists',
#'grid_tables',
#'pipe_tables',
#'strikeout',
#'superscript',
#'subscript',
#'markdown_in_html_blocks',
#'shortcut_reference_links',
#'autolink_bare_uris',
#'raw_html',
#'link_attributes',
#'header_attributes',
#'footnotes',
#])
#self.o = 'html5'
#else:
#self.o = "markdown-" + "-".join([
#'raw_html',
#'native_divs',
#'native_spans',
#])
#self.i = 'html'
#def convert(self, text):
#cmd = (
#self.executable,
#'-o-',
#'--from=%s' % self.i,
#'--to=%s' % self.o
#)
#logging.debug('converting content with Pandoc')
#p = subprocess.Popen(
#cmd,
#stdin=subprocess.PIPE,
#stdout=subprocess.PIPE,
#stderr=subprocess.PIPE,
#)
#stdout, stderr = p.communicate(input=text.encode())
#if stderr:
#logging.error("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr)
#return stdout.decode('utf-8').strip()
2017-05-23 11:14:47 +01:00
# based on http://stackoverflow.com/a/10075210
class ExifTool(shared.CMDLine):
2017-05-23 11:14:47 +01:00
""" Handles calling external binary `exiftool` in an efficient way """
sentinel = "{ready}\n"
2017-05-23 11:13:35 +01:00
def __init__(self):
2017-05-23 11:14:47 +01:00
super().__init__('exiftool')
def get_metadata(self, *filenames):
return json.loads(self.execute('-sort', '-json', '-MIMEType', '-FileType', '-FileName', '-ModifyDate', '-CreateDate', '-DateTimeOriginal', '-ImageHeight', '-ImageWidth', '-Aperture', '-FOV', '-ISO', '-FocalLength', '-FNumber', '-FocalLengthIn35mmFormat', '-ExposureTime', '-Copyright', '-Artist', '-Model', '-GPSLongitude#', '-GPSLatitude#', '-LensID', *filenames))
class Images(BaseIter):
def __init__(self, extensions=['jpg', 'gif', 'png']):
super(Images, self).__init__()
logging.info(
"initiating images with extensions: %s",
extensions
)
2017-05-23 11:13:35 +01:00
self.files = []
2017-05-23 11:14:47 +01:00
self.data = {}
# if anyone knows how to do this in a more pythonic way, please tell me
paths = [
shared.config.get('source', 'filesdir'),
shared.config.get('source', 'photosdir')
]
for p in paths:
for ext in extensions:
self.files += glob.glob(os.path.join(p, "*.%s" % ext))
def populate(self):
with ExifTool() as e:
_meta = e.get_metadata(*self.files)
# parsing the returned meta into a dict of [filename]={meta}
for e in _meta:
if 'FileName' not in e:
logging.error("missing 'FileName' in element %s", e)
continue
fname = os.path.basename(e['FileName'])
del(e['FileName'])
# duplicate files are going to be a problem, so don't send it
# away with a simple error log entry
if fname in self.data:
raise ValueError('filename collision: %s', fname)
# convert dates
for k, v in e.items():
e[k] = self.exifdate(v)
self.data[fname] = WebImage(fname, e)
def exifdate(self, value):
""" converts and EXIF date string to ISO 8601 format
2017-05-23 11:13:35 +01:00
2017-05-23 11:14:47 +01:00
:param value: EXIF date (2016:05:01 00:08:24)
:type arg1: str
:return: ISO 8601 string with UTC timezone 2016-05-01T00:08:24+0000
:rtype: str
"""
if not isinstance(value, str):
return value
match = shared.EXIFREXEG.match(value)
if not match:
return value
return "%s-%s-%sT%s+0000" % (
match.group('year'),
match.group('month'),
match.group('day'),
match.group('time')
)
class WebImage(object):
def __init__(self, fname, meta):
logging.info(
"parsing image: %s",
fname
)
self.meta = meta
self.fpath = os.path.abspath(meta.get('SourceFile', fname))
self.fname, self.ext = os.path.splitext(fname)
self.alttext = ''
self.sizes = []
self.fallbacksize = int(shared.config.get('common','fallbackimg', fallback='720'))
self.cl = None
2017-05-23 11:14:47 +01:00
for size in shared.config.options('downsize'):
sizeext = shared.config.get('downsize', size)
fname = "%s_%s%s" % (self.fname, sizeext, self.ext)
self.sizes.append((
int(size),
{
'fpath': os.path.join(
shared.config.get('target', 'filesdir'),
fname
),
'url': "%s/%s/%s" % (
shared.config.get('site', 'url'),
shared.config.get('source', 'files'),
fname
),
'crop': shared.config.getboolean('crop', size, fallback=False),
}
))
self.sizes = sorted(self.sizes, reverse=False)
self.target = False
if self.is_downsizeable:
self.fallback = [e for e in self.sizes if e[0] == self.fallbacksize][0][1]['url']
self.target = self.sizes[-1][1]['url']
else:
self.fallback = "%s/%s/%s" % (
shared.config.get('site', 'url'),
shared.config.get('source', 'files'),
"%s%s" % (self.fname, self.ext)
)
def __str__(self):
if self.is_downsizeable and not self.cl:
2017-05-23 11:14:47 +01:00
return '\n<figure class="photo"><a target="_blank" class="adaptive" href="%s"><img src="%s" class="adaptimg" alt="%s" /></a><figcaption class=\"caption\">%s%s</figcaption></figure>\n' % (
self.target,
self.fallback,
self.alttext,
self.fname,
self.ext
2017-05-23 11:13:35 +01:00
)
elif self.cl:
self.cl = self.cl.replace('.', ' ')
return '<img src="%s" class="%s" alt="%s" title="%s%s" />' % (
self.fallback,
self.cl,
self.alttext,
self.fname,
self.ext
)
2017-05-23 11:14:47 +01:00
else:
return '<img src="%s" class="aligncenter" alt="%s" title="%s%s" />' % (
2017-05-23 11:14:47 +01:00
self.fallback,
self.alttext,
self.fname,
self.ext
)
@property
def rssenclosure(self):
""" Returns the largest available image for RSS to add as attachment """
target = self.sizes[-1][1]
return {
'mime': magic.Magic(mime=True).from_file(target['fpath']),
'url': target['url'],
'bytes': os.path.getsize(target['fpath'])
}
@property
def is_photo(self):
""" Match image meta against config artist regex to see if the file is
a photo or just a regular image """
pattern = shared.config.get('photo', 'regex', fallback=None)
if not pattern or not isinstance(pattern, str):
return False
pattern = re.compile(pattern)
cpr = self.meta.get('Copyright', '')
art = self.meta.get('Artist', '')
if not cpr and not art:
return False
if pattern.search(cpr) \
or pattern.search(art):
return True
2017-05-23 11:13:35 +01:00
2017-05-23 11:14:47 +01:00
return False
2017-05-23 11:13:35 +01:00
2017-05-23 11:14:47 +01:00
@property
def is_downsizeable(self):
""" Check if the image is large enough and jpeg or png in order to
downsize it """
fb = self.sizes[-1][0]
ftype = self.meta.get('FileType', None)
if not ftype:
return False
if ftype.lower() == 'jpeg' or ftype.lower() == 'png':
width = int(self.meta.get('ImageWidth', 0))
height = int(self.meta.get('ImageHeight', 0))
if width > fb or height > fb:
return True
return False
def _copy(self):
target = os.path.join(
shared.config.get('target', 'filesdir'),
"%s%s" % (self.fname, self.ext)
)
if not os.path.isfile(target):
logging.debug("can't downsize %s, copying instead" % self.fname)
shutil.copy(self.fpath, target)
def _watermark(self, img):
""" Composite image by adding watermark file over it """
wmarkfile = os.path.join(
shared.config.get('common', 'basedir'),
shared.config.get('common', 'watermark')
)
if not os.path.isfile(wmarkfile):
return img
with wand.image.Image(filename=wmarkfile) as wmark:
if img.width > img.height:
w = img.width * 0.16
h = wmark.height * (w / wmark.width)
x = img.width - w - (img.width * 0.01)
y = img.height - h - (img.height * 0.01)
else:
w = img.height * 0.16
h = wmark.height * (w / wmark.width)
x = img.width - h - (img.width * 0.01)
y = img.height - w - (img.height * 0.01)
w = round(w)
h = round(h)
x = round(x)
y = round(y)
wmark.resize(w, h)
if img.width <= img.height:
wmark.rotate(-90)
img.composite(image=wmark, left=x, top=y)
return img
def _intermediate_dimensions(self, size, width, height, crop = False):
size = int(size)
w = width
h = height
if (width > height and not crop) \
or (width < height and crop):
w = size
h = int(float(size / width) * height)
2017-05-23 11:13:35 +01:00
else:
2017-05-23 11:14:47 +01:00
h = size
w = int(float(size / height) * width)
return (w, h)
def _intermediate(self, img, size, meta, existing = []):
if img.width <= size and img.height <= size:
return False
crop = meta.get('crop', False)
with img.clone() as thumb:
width, height = self._intermediate_dimensions(
size,
img.width,
img.height,
crop
)
thumb.resize(width, height)
if crop:
thumb.liquid_rescale(size, size, 1, 1)
if self.meta.get('FileType', 'jpeg').lower() == 'jpeg':
thumb.compression_quality = 86
thumb.unsharp_mask(
radius=0,
sigma=0.5,
amount=1,
threshold=0.03
2017-05-23 11:13:35 +01:00
)
2017-05-23 11:14:47 +01:00
thumb.format = 'pjpeg'
# this is to make sure pjpeg happens
with open(meta['fpath'], 'wb') as f:
thumb.save(file=f)
return True
async def downsize(self, existing = []):
if not self.is_downsizeable:
self._copy()
return
logging.info("checking downsizing for %s", self.fname)
needed = shared.config.getboolean('params', 'regenerate', fallback=False)
if not needed:
for (size, meta) in self.sizes:
if meta['fpath'] not in existing:
needed = True
if not needed:
logging.debug("downsizing not needed for %s", self.fname)
return
with wand.image.Image(filename=self.fpath) as img:
img.auto_orient()
if self.is_photo:
logging.info("%s is a photo", self.fpath)
img = self._watermark(img)
for (size, meta) in self.sizes:
self._intermediate(img, size, meta, existing)
class Taxonomy(BaseIter):
def __init__(self, name = None, taxonomy = None, slug = None):
super(Taxonomy, self).__init__()
self.name = name
if name and not slug:
self.slug = slugify(name, only_ascii=True, lower=True)
else:
self.slug = slug
self.taxonomy = taxonomy
@property
def pages(self):
return math.ceil(len(self.data) / shared.config.getint('common', 'pagination'))
def __repr__(self):
return "taxonomy %s with %d items" % (self.taxonomy, len(self.data))
@property
def basep(self):
p = shared.config.get('target', 'builddir')
if self.taxonomy:
p = os.path.join(p, self.taxonomy)
return p
@property
def myp(self):
p = self.basep
if self.slug:
return os.path.join(p,self.slug)
return p
@property
def feedp(self):
return os.path.join(self.myp, 'feed')
@property
def pagep(self):
return os.path.join(self.myp, 'page')
@property
def baseurl(self):
if self.taxonomy and self.slug:
return "/%s/%s/" % (self.taxonomy, self.slug)
else:
return '/'
@property
def mtime(self):
return int(list(sorted(self.data.keys(), reverse=True))[0])
def __mkdirs(self):
check = [self.basep, self.myp, self.feedp]
if self.pages > 1:
check.append(self.pagep)
for i in range(2, self.pages+1):
subpagep = os.path.abspath(os.path.join(
self.pagep,
'%d' % i
))
check.append(subpagep)
for p in check:
if not os.path.isdir(p):
logging.debug("creating dir %s", p)
os.mkdir(p)
def tpath(self, page):
if page == 1:
return "%s/index.html" % (self.myp)
else:
return "%s/%d/index.html" % (self.pagep, page)
async def render(self, renderer):
self.__mkdirs()
page = 1
testpath = self.tpath(page)
if not shared.config.getboolean('params', 'force') and os.path.isfile(testpath):
ttime = int(os.path.getmtime(testpath))
if ttime == self.mtime:
logging.info('taxonomy index for "%s" exists and up-to-date (lastmod: %d)', self.slug, ttime)
return
while page <= self.pages:
self.renderpage(renderer, page)
page = page+1
def renderpage(self, renderer, page):
pagination = int(shared.config.get('common', 'pagination'))
start = int((page-1) * pagination)
end = int(start + pagination)
posttmpls = [self.data[k].tmplvars for k in list(sorted(
self.data.keys(), reverse=True))[start:end]]
target = self.tpath(page)
logging.info("rendering taxonomy page %d to %s", page, target)
tmplvars = {
'taxonomy': {
'url': self.baseurl,
'name': self.name,
'taxonomy': self.taxonomy,
'paged': page,
'total': self.pages,
'perpage': pagination
},
'site': renderer.sitevars,
'posts': posttmpls,
}
r = renderer.j2.get_template('archive.html').render(tmplvars)
with open(target, "wt") as html:
html.write(r)
os.utime(target, (self.mtime, self.mtime))
if 1 == page:
target = os.path.join(self.feedp, 'index.xml')
logging.info("rendering RSS feed to %s", target)
r = renderer.j2.get_template('rss.html').render(tmplvars)
with open(target, "wt") as html:
html.write(r)
os.utime(target, (self.mtime, self.mtime))
class Content(BaseIter):
def __init__(self, images, extensions=['md']):
super(Content, self).__init__()
self.images = images
basepath = shared.config.get('source', 'contentdir')
self.files = []
for ext in extensions:
self.files += glob.glob(os.path.join(basepath, "*", "*.%s" % ext))
self.tags = {}
self.categories = {}
self.front = Taxonomy()
def populate(self):
now = arrow.utcnow().timestamp
2017-05-23 11:14:47 +01:00
for fpath in self.files:
item = Singular(fpath, self.images)
self.append(item.pubtime, item)
if item.pubtime > now:
logging.warning("skipping future post %s", item.fname)
continue
2017-05-23 11:14:47 +01:00
if item.isonfront:
self.front.append(item.pubtime, item)
if item.iscategorised:
if item.category not in self.categories:
self.categories[item.category] = Taxonomy(item.category, 'category')
self.categories[item.category].append(item.pubtime, item)
for tag in item.tags:
tslug = slugify(tag, only_ascii=True, lower=True)
if tslug not in self.tags:
self.tags[tslug] = Taxonomy(tag, 'tag', tslug)
self.tags[tslug].append(item.pubtime, item)
self.symlinktag(tslug, item.path)
def symlinktag(self, tslug, fpath):
fdir, fname = os.path.split(fpath)
tagpath = os.path.join(shared.config.get('source', 'tagsdir'), tslug)
if not os.path.isdir(tagpath):
os.mkdir(tagpath)
sympath = os.path.relpath(fdir, tagpath)
dst = os.path.join(tagpath, fname)
src = os.path.join(sympath, fname)
if not os.path.islink(dst):
os.symlink(src, dst)
def sitemap(self):
target = os.path.join(
shared.config.get('target', 'builddir'),
'sitemap.txt'
)
urls = []
for item in self.data.values():
2017-05-23 11:14:47 +01:00
urls.append( "%s/%s/" % (
shared.config.get('site', 'url'),
item.fname
))
with open(target, "wt") as f:
logging.info("writing sitemap to %s" % (target))
f.write("\n".join(urls))
def magicphp(self, renderer):
redirects = []
gones = []
rfile = os.path.join(
shared.config.get('common', 'basedir'),
shared.config.get('common', 'redirects')
)
if os.path.isfile(rfile):
with open(rfile, newline='') as csvfile:
r = csv.reader(csvfile, delimiter=' ')
for row in r:
redirects.append((row[0], row[1]))
for item in self.data.values():
redirects.append((item.shortslug, item.fname))
rfile = os.path.join(
shared.config.get('common', 'basedir'),
shared.config.get('common', 'gone')
)
if os.path.isfile(rfile):
with open(rfile, newline='') as csvfile:
r = csv.reader(csvfile, delimiter=' ')
for row in r:
gones.append(row[0])
tmplvars = {
'redirects': redirects,
'gones': gones
}
r = renderer.j2.get_template("magic.php").render(tmplvars)
target = os.path.abspath(os.path.join(
shared.config.get('target', 'builddir'),
'magic.php'
))
with open(target, "w") as html:
logging.debug('writing %s', target)
html.write(r)
html.close()
2017-05-23 11:14:47 +01:00
class Singular(object):
def __init__(self, path, images):
logging.debug("initiating singular object from %s", path)
self.path = path
self.images = images
self.category = splitpath(path)[-2]
self.mtime = int(os.path.getmtime(self.path))
self.fname, self.ext = os.path.splitext(os.path.basename(self.path))
self.meta = {}
self.content = ''
self.photo = self.images.data.get("%s.jpg" % self.fname, None)
self.__parse()
def __repr__(self):
return "%s (lastmod: %s)" % (self.fname, self.published)
def __parse(self):
with open(self.path, mode='rt') as f:
self.meta, self.content = frontmatter.parse(f.read())
self.__filter_images()
if self.isphoto:
#self.photo.alttext = self.content
self.content = "%s\n%s" % (
self.content,
self.photo
)
#@property
#def isrepost(self):
#isrepost = False
#if len(self.reactions.keys()):
#isrepost = list(self.reactions.keys())[0]
#if isrepost:
#if len(self.reactions[isrepost]) == 1:
#linkto = self.reactions[isrepost][0]
def __filter_images(self):
linkto = False
isrepost = None
if len(self.reactions.keys()):
isrepost = list(self.reactions.keys())[0]
if isrepost and \
len(self.reactions[isrepost]) == 1:
linkto = self.reactions[isrepost][0]
m = shared.MDIMGREGEX.findall(self.content)
if not m:
logging.debug("no images found")
return
for shortcode, alt, fname, title, cl in m:
image = self.images.data.get(fname, None)
if not image:
logging.debug("%s not found in images", fname)
continue
if cl:
image.cl = cl
2017-05-23 11:14:47 +01:00
logging.debug(
"replacing %s in content with %s",
shortcode,
"%s" % image
)
self.content = self.content.replace(
shortcode,
"%s" % image
)
@property
def reactions(self):
# getting rid of '-' to avoid css trouble and similar
convert = {
'bookmark-of': 'bookmark',
'repost-of': 'repost',
'in-reply-to': 'reply',
}
reactions = {}
for k, v in convert.items():
x = self.meta.get(k, None)
if not x:
continue
if isinstance(x, str):
x = [x]
reactions[v] = x
return reactions
@property
def urls(self):
urls = shared.URLREGEX.findall(self.content)
for reactionurls in self.reactions.values():
urls = [*urls, *reactionurls]
r = []
for link in urls:
domain = '{uri.netloc}'.format(uri=urllib.parse.urlparse(link))
if domain in shared.config.get('site', 'domains'):
continue
if r.get(link, False):
continue
r.append(link)
return r
2017-05-23 11:14:47 +01:00
@property
def lang(self):
lang = 'en'
try:
lang = langdetect.detect("\n".join([
self.title,
self.content
]))
except:
pass
return lang
@property
def tags(self):
return list(self.meta.get('tags', []))
@property
def published(self):
return arrow.get(
self.meta.get('published', self.mtime)
)
@property
def updated(self):
return arrow.get(
self.meta.get('updated',
self.meta.get('published', self.mtime)
)
)
@property
def pubtime(self):
return int(self.published.timestamp)
@property
def isphoto(self):
if not self.photo:
return False
return self.photo.is_photo
@property
def isbookmark(self):
return self.meta.get('bookmark-of', False)
@property
def ispage(self):
if not self.meta:
return True
return False
@property
def isonfront(self):
if self.ispage or self.isbookmark:
return False
return True
@property
def iscategorised(self):
if self.ispage:
return False
return True
@property
def summary(self):
return self.meta.get('summary', '')
@property
def title(self):
for maybe in ['title', 'bookmark-of', 'in-reply-to', 'repost-of']:
maybe = self.meta.get(maybe, False)
if maybe:
return maybe
return ''
2017-05-23 11:14:47 +01:00
@property
def url(self):
return "%s/%s/" % (shared.config.get('site', 'url'), self.fname)
@property
def tmplfile(self):
if self.ispage:
return 'page.html'
2017-05-23 11:13:35 +01:00
else:
2017-05-23 11:14:47 +01:00
return 'singular.html'
@property
def html(self):
return Pandoc().convert(self.content)
@property
def offlinecopies(self):
# stupidly simple property caching
if hasattr(self, 'copies'):
return self.copies
copies = {}
for maybe in ['bookmark-of', 'in-reply-to', 'repost-of']:
maybe = self.meta.get(maybe, False)
if not maybe:
continue
if not isinstance(maybe, list):
maybe = [maybe]
for url in maybe:
copies[url] = OfflineCopy(url)
copies[url].run()
self.copies = copies
return copies
@property
def exif(self):
if not self.isphoto:
return None
exif = {}
mapping = {
'camera': [
'Model'
],
'aperture': [
'FNumber',
'Aperture'
],
'shutter_speed': [
'ExposureTime'
],
'focallength35mm': [
'FocalLengthIn35mmFormat',
],
'focallength': [
'FocalLength',
],
'iso': [
'ISO'
],
'lens': [
'LensID',
],
'date': [
'CreateDate',
'DateTimeOriginal',
],
'geo_latitude': [
'GPSLatitude'
],
'geo_longitude': [
'GPSLongitude'
],
}
for ekey, candidates in mapping.items():
for candidate in candidates:
maybe = self.photo.meta.get(candidate, None)
if maybe:
if 'geo_' in ekey:
exif[ekey] = round(float(maybe), 5)
else:
exif[ekey] = maybe
break
return exif
@property
def rssenclosure(self):
if not self.isphoto:
return {}
return self.photo.rssenclosure
@property
def tmplvars(self):
return {
'title': self.title,
'published': self.published.datetime,
'tags': self.tags,
'author': dict(shared.config.items('author')),
'content': self.content,
'html': self.html,
'category': self.category,
'reactions': self.reactions,
'updated': self.updated.datetime,
'summary': self.meta.get('summary', ''),
'exif': self.exif,
'lang': self.lang,
'syndicate': '',
'slug': self.fname,
'shortslug': self.shortslug,
'rssenclosure': self.rssenclosure,
'copies': self.offlinecopies,
2017-05-23 11:14:47 +01:00
}
@property
def shortslug(self):
return self.baseN(self.pubtime)
@staticmethod
def baseN(num, b=36, numerals="0123456789abcdefghijklmnopqrstuvwxyz"):
""" Used to create short, lowecase slug for a number (an epoch) passed """
num = int(num)
return ((num == 0) and numerals[0]) or (
Singular.baseN(
num // b,
b,
numerals
).lstrip(numerals[0]) + numerals[num % b]
)
async def render(self, renderer):
logging.info("rendering and saving %s", self.fname)
targetdir = os.path.abspath(os.path.join(
shared.config.get('target', 'builddir'),
self.fname
))
target = os.path.join(targetdir, 'index.html')
if not shared.config.getboolean('params', 'force') and os.path.isfile(target):
ttime = int(os.path.getmtime(target))
logging.debug('ttime is %d mtime is %d', ttime, self.mtime)
if ttime == self.mtime:
logging.debug('%s exists and up-to-date (lastmod: %d)', target, ttime)
return
if not os.path.isdir(targetdir):
os.mkdir(targetdir)
tmplvars = {
'post': self.tmplvars,
'site': renderer.sitevars,
'taxonomy': {},
}
r = renderer.j2.get_template(self.tmplfile).render(tmplvars)
with open(target, "w") as html:
logging.debug('writing %s', target)
html.write(r)
html.close()
os.utime(target, (self.mtime, self.mtime))
class NASG(object):
def __init__(self):
# --- set params
parser = argparse.ArgumentParser(description='Parameters for NASG')
parser.add_argument(
'--clear',
action='store_true',
default=False,
help='clear build directory in advance'
)
2017-05-23 11:14:47 +01:00
parser.add_argument(
'--regenerate',
action='store_true',
default=False,
help='force downsizing images'
)
parser.add_argument(
'--force',
action='store_true',
default=False,
help='force rendering HTML'
)
parser.add_argument(
'--loglevel',
default='info',
help='change loglevel'
)
parser.add_argument(
'--nodownsize',
action='store_true',
default=False,
help='skip image downsizing'
)
parser.add_argument(
'--norender',
action='store_true',
default=False,
help='skip rendering'
)
params = vars(parser.parse_args())
shared.config.add_section('params')
for k, v in params.items():
shared.config.set('params', k, str(v))
# remove the rest of the potential loggers
while len(logging.root.handlers) > 0:
logging.root.removeHandler(logging.root.handlers[-1])
# --- set loglevel
llevel = {
'critical': 50,
'error': 40,
'warning': 30,
'info': 20,
'debug': 10
}
logging.basicConfig(
level=llevel[shared.config.get('params', 'loglevel')],
format='%(asctime)s - %(levelname)s - %(message)s'
)
async def __adownsize(self, images, existing):
for fname, img in images:
await img.downsize(existing)
async def __acrender(self, content, renderer):
for (pubtime, singular) in content:
await singular.render(renderer)
async def __atrender(self, taxonomies, renderer):
for e in taxonomies:
for name, t in e.items():
await t.render(renderer)
async def __afrender(self, front, renderer):
await front.render(renderer)
async def __aindex(self, content, searchdb):
for (pubtime, singular) in content:
await searchdb.append(singular)
def run(self):
if shared.config.getboolean('params', 'clear'):
input('about to clear build directory, press enter to continue')
shutil.rmtree(os.path.abspath(
shared.config.get('target', 'builddir')
))
2017-05-23 11:14:47 +01:00
loop = asyncio.get_event_loop()
for d in shared.config.options('target'):
if 'dir' in d and not os.path.isdir(shared.config.get('target', d)):
os.mkdir(shared.config.get('target', d))
logging.info("discovering images")
images = Images()
images.populate()
existing = glob.glob(os.path.join(shared.config.get('target', 'filesdir'), "*"))
if not shared.config.getboolean('params', 'nodownsize'):
logging.info("downsizing images")
loop.run_until_complete(self.__adownsize(images, existing))
logging.info("discovering content")
content = Content(images)
content.populate()
renderer = Renderer()
2017-05-23 11:14:47 +01:00
if not shared.config.getboolean('params', 'norender'):
logging.info("rendering content")
loop.run_until_complete(self.__acrender(content, renderer))
logging.info("rendering categories and tags")
loop.run_until_complete(self.__atrender([content.categories, content.tags], renderer))
logging.info("rendering the front page elements")
loop.run_until_complete(self.__afrender(content.front, renderer))
logging.info("rendering sitemap")
content.sitemap()
logging.info("render magic.php")
content.magicphp(renderer)
2017-05-23 11:14:47 +01:00
logging.info("copy the static bits")
src = shared.config.get('source', 'staticdir')
for item in os.listdir(src):
s = os.path.join(src, item)
d = os.path.join(shared.config.get('target', 'builddir'), item)
logging.debug("copying %s to %s", s, d)
shutil.copy2(s, d)
logging.info("pouplating searchdb")
searchdb = Indexer()
loop.run_until_complete(self.__aindex(content, searchdb))
searchdb.finish()
loop.close()
2017-05-23 11:13:35 +01:00
if __name__ == '__main__':
2017-05-23 11:14:47 +01:00
worker = NASG()
worker.run()