2017-05-23 11:14:47 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import configparser
|
2017-05-23 11:13:35 +01:00
|
|
|
import argparse
|
2017-05-23 11:14:47 +01:00
|
|
|
import shutil
|
2017-05-23 11:13:35 +01:00
|
|
|
import logging
|
2017-05-23 11:14:47 +01:00
|
|
|
import json
|
|
|
|
import glob
|
|
|
|
import tempfile
|
|
|
|
import atexit
|
2017-05-23 11:13:35 +01:00
|
|
|
import re
|
2017-05-23 11:14:47 +01:00
|
|
|
import hashlib
|
|
|
|
import math
|
|
|
|
import asyncio
|
2017-05-26 10:14:24 +01:00
|
|
|
import csv
|
2017-06-02 11:19:55 +01:00
|
|
|
import getpass
|
2017-06-28 12:20:26 +01:00
|
|
|
import quopri
|
|
|
|
import base64
|
|
|
|
import mimetypes
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
import magic
|
2017-05-23 11:13:35 +01:00
|
|
|
import arrow
|
2017-05-23 11:14:47 +01:00
|
|
|
import wand.image
|
|
|
|
import similar_text
|
|
|
|
import frontmatter
|
2017-05-23 11:13:35 +01:00
|
|
|
from slugify import slugify
|
2017-05-23 11:14:47 +01:00
|
|
|
import langdetect
|
|
|
|
import requests
|
|
|
|
from whoosh import index
|
2017-05-26 14:52:30 +01:00
|
|
|
from whoosh import qparser
|
2017-05-23 11:14:47 +01:00
|
|
|
import jinja2
|
2017-05-26 10:14:24 +01:00
|
|
|
import urllib.parse
|
2017-05-31 13:53:47 +01:00
|
|
|
from webmentiontools.send import WebmentionSend
|
2017-06-02 11:19:55 +01:00
|
|
|
from bleach import clean
|
|
|
|
from emoji import UNICODE_EMOJI
|
2017-06-12 15:17:29 +01:00
|
|
|
from bs4 import BeautifulSoup
|
2017-06-28 12:20:26 +01:00
|
|
|
from readability.readability import Document
|
2017-06-12 15:40:30 +01:00
|
|
|
import shared
|
2017-05-31 13:53:47 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def splitpath(path):
|
|
|
|
parts = []
|
|
|
|
(path, tail) = os.path.split(path)
|
|
|
|
while path and tail:
|
|
|
|
parts.insert(0,tail)
|
|
|
|
(path,tail) = os.path.split(path)
|
|
|
|
return parts
|
|
|
|
|
2017-05-26 14:52:30 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
class BaseIter(object):
|
|
|
|
def __init__(self):
|
|
|
|
self.data = {}
|
|
|
|
|
|
|
|
def append(self, key, value):
|
|
|
|
if key in self.data:
|
|
|
|
logging.warning("duplicate key: %s, using existing instead", key)
|
|
|
|
existing = self.data.get(key)
|
|
|
|
if hasattr(value, 'fname') and hasattr(existing, 'fname'):
|
|
|
|
logging.warning(
|
|
|
|
"%s collides with existing %s",
|
|
|
|
value.fname,
|
|
|
|
existing.fname
|
|
|
|
)
|
|
|
|
return
|
|
|
|
self.data[key] = value
|
|
|
|
|
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
return self.data.get(key, {})
|
|
|
|
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return json.dumps(list(self.data.values()))
|
|
|
|
|
|
|
|
|
|
|
|
def __next__(self):
|
|
|
|
try:
|
|
|
|
r = self.data.next()
|
|
|
|
except:
|
|
|
|
raise StopIteration()
|
|
|
|
return r
|
|
|
|
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
for k, v in self.data.items():
|
|
|
|
yield (k, v)
|
|
|
|
return
|
|
|
|
|
|
|
|
|
2017-06-04 11:38:36 +01:00
|
|
|
class BaseRenderable(object):
|
|
|
|
def __init__(self):
|
|
|
|
return
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
def writerendered(self, content, mtime=None):
|
|
|
|
mtime = mtime or self.mtime
|
2017-06-12 15:40:30 +01:00
|
|
|
d = os.path.dirname(self.target)
|
2017-06-04 11:38:36 +01:00
|
|
|
if not os.path.isdir(d):
|
|
|
|
os.mkdir(d)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
with open(self.target, "w") as html:
|
|
|
|
logging.debug('writing %s', self.target)
|
2017-06-04 11:38:36 +01:00
|
|
|
html.write(content)
|
|
|
|
html.close()
|
2017-06-28 12:20:26 +01:00
|
|
|
os.utime(self.target, (mtime, mtime))
|
2017-06-04 11:38:36 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
class Indexer(object):
|
2017-05-23 11:14:47 +01:00
|
|
|
def __init__(self):
|
|
|
|
self.target = os.path.abspath(os.path.join(
|
|
|
|
shared.config.get('target', 'builddir'),
|
|
|
|
shared.config.get('var', 'searchdb')
|
|
|
|
))
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-05-26 14:52:30 +01:00
|
|
|
if not os.path.isdir(self.target):
|
|
|
|
os.mkdir(self.target)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
self.mtime = 0
|
|
|
|
|
2017-05-26 14:52:30 +01:00
|
|
|
if index.exists_in(self.target):
|
|
|
|
self.ix = index.open_dir(self.target)
|
2017-06-02 11:19:55 +01:00
|
|
|
tocfiles = glob.glob(os.path.join(
|
|
|
|
self.target,
|
|
|
|
"_MAIN_*.toc"
|
|
|
|
))
|
|
|
|
if len(tocfiles):
|
|
|
|
self.mtime = int(os.path.getmtime(tocfiles[0]))
|
2017-05-26 14:52:30 +01:00
|
|
|
else:
|
|
|
|
self.ix = index.create_in(self.target, shared.schema)
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-05-26 14:52:30 +01:00
|
|
|
self.writer = self.ix.writer()
|
|
|
|
self.qp = qparser.QueryParser("url", schema=shared.schema)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
async def append(self, singular):
|
2017-05-26 14:52:30 +01:00
|
|
|
logging.debug("searching for existing index for %s", singular.fname)
|
2017-06-02 11:19:55 +01:00
|
|
|
if self.mtime >= singular.mtime:
|
|
|
|
logging.debug("search index is newer than post mtime (%d vs %d), skipping post", self.mtime, singular.mtime)
|
|
|
|
return
|
2017-05-26 14:52:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
exists = False
|
2017-05-26 14:52:30 +01:00
|
|
|
q = self.qp.parse(singular.url)
|
|
|
|
r = self.ix.searcher().search(q, limit=1)
|
|
|
|
if r:
|
|
|
|
r = r[0]
|
|
|
|
# nothing to do, the entry is present and is up to date
|
|
|
|
ixtime = r['mtime']
|
|
|
|
if int(ixtime) == int(singular.mtime):
|
|
|
|
logging.info("search index is up to date for %s", singular.fname)
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
logging.info("search index is out of date: %d (indexed) vs %d", ixtime, singular.mtime)
|
|
|
|
exists = True
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
content_real = [
|
|
|
|
singular.fname,
|
|
|
|
singular.summary,
|
|
|
|
singular.content,
|
2017-07-04 12:30:32 +01:00
|
|
|
singular.reactions.values()
|
2017-05-23 11:14:47 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
content_remote = []
|
2017-06-12 15:17:29 +01:00
|
|
|
#for url, offlinecopy in singular.offlinecopies.items():
|
|
|
|
#content_remote.append("%s" % offlinecopy)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
weight = 1
|
2017-07-04 12:30:32 +01:00
|
|
|
if singular.isbookmark or singular.isfav:
|
2017-05-26 10:14:24 +01:00
|
|
|
weight = 10
|
|
|
|
if singular.ispage:
|
|
|
|
weight = 100
|
|
|
|
|
2017-06-01 12:19:32 +01:00
|
|
|
content = " ".join(list(map(str,[*content_real, *content_remote])))
|
2017-05-26 14:52:30 +01:00
|
|
|
if exists:
|
|
|
|
logging.info("updating search index with %s", singular.fname)
|
|
|
|
self.writer.add_document(
|
|
|
|
title=singular.title,
|
|
|
|
url=singular.url,
|
2017-06-01 12:19:32 +01:00
|
|
|
content=content,
|
|
|
|
fuzzy=content,
|
2017-05-26 14:52:30 +01:00
|
|
|
date=singular.published.datetime,
|
|
|
|
tags=",".join(list(map(str, singular.tags))),
|
|
|
|
weight=weight,
|
|
|
|
img="%s" % singular.photo,
|
|
|
|
mtime=singular.mtime,
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
logging.info("appending search index with %s", singular.fname)
|
|
|
|
self.writer.update_document(
|
|
|
|
title=singular.title,
|
|
|
|
url=singular.url,
|
|
|
|
content=" ".join(list(map(str,[*content_real, *content_remote]))),
|
2017-06-02 11:19:55 +01:00
|
|
|
fuzzy=content,
|
2017-05-26 14:52:30 +01:00
|
|
|
date=singular.published.datetime,
|
|
|
|
tags=",".join(list(map(str, singular.tags))),
|
|
|
|
weight=weight,
|
|
|
|
img="%s" % singular.photo,
|
|
|
|
mtime=singular.mtime
|
|
|
|
)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def finish(self):
|
|
|
|
self.writer.commit()
|
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
class OfflineArchive(object):
|
|
|
|
# keep in mind that these are frontmattered HTML files with full HTML and embedded images
|
|
|
|
# they can get VERY large
|
|
|
|
def __init__(self, url, content=None, decode_email=False):
|
2017-05-23 11:14:47 +01:00
|
|
|
self.url = url
|
2017-06-28 12:20:26 +01:00
|
|
|
self.parsed = urllib.parse.urlparse(url)
|
|
|
|
self.fbase = shared.slugfname(url)
|
|
|
|
self.fname = "%s.md" % self.fbase
|
2017-05-23 11:14:47 +01:00
|
|
|
self.target = os.path.join(
|
2017-06-12 15:17:29 +01:00
|
|
|
shared.config.get('source', 'offlinecopiesdir'),
|
2017-05-23 11:14:47 +01:00
|
|
|
self.fname
|
|
|
|
)
|
2017-06-28 12:20:26 +01:00
|
|
|
self.targetd = os.path.join(
|
|
|
|
shared.config.get('source', 'offlinecopiesdir'),
|
|
|
|
self.fbase
|
|
|
|
)
|
|
|
|
if not os.path.isdir(self.targetd):
|
|
|
|
os.mkdir(self.targetd)
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
self.fm = frontmatter.loads('')
|
|
|
|
self.fm.metadata = {
|
|
|
|
'url': self.url,
|
|
|
|
'date': arrow.utcnow().format("YYYY-MM-DDTHH:mm:ssZ"),
|
|
|
|
}
|
2017-06-12 15:17:29 +01:00
|
|
|
self.headers = requests.utils.default_headers()
|
|
|
|
self.headers.update({
|
|
|
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
|
|
|
|
})
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
self.skip_fetch = False
|
|
|
|
if content:
|
|
|
|
self.skip_fetch = True
|
|
|
|
if decode_email:
|
|
|
|
content = quopri.decodestring(content)
|
|
|
|
content = str(content, 'utf-8', errors='replace')
|
|
|
|
self.fm.content = content
|
|
|
|
#self.tmp = tempfile.mkdtemp(
|
|
|
|
#'offlinearchive_',
|
|
|
|
#dir=tempfile.gettempdir()
|
|
|
|
#)
|
|
|
|
#atexit.register(
|
|
|
|
#shutil.rmtree,
|
|
|
|
#os.path.abspath(self.tmp)
|
|
|
|
#)
|
|
|
|
#self.images = []
|
|
|
|
|
|
|
|
self.exists = os.path.isfile(self.target)
|
|
|
|
|
2017-07-04 12:30:32 +01:00
|
|
|
#def read(self):
|
|
|
|
#if not self.exists:
|
|
|
|
#return ''
|
|
|
|
|
|
|
|
#with open(self.target, 'rt') as f:
|
|
|
|
#self.fm = frontmatter.loads(f.read())
|
|
|
|
|
|
|
|
#readable = ''
|
|
|
|
#try:
|
|
|
|
#readable = Document(self.fm.content)
|
|
|
|
#readable = shared.Pandoc(False).convert(readable.summary())
|
|
|
|
#readable = shared.Pandoc().convert(readable)
|
|
|
|
#except Exception as e:
|
|
|
|
#logging.error('Failed to readable %s', self.target)
|
|
|
|
|
|
|
|
#return readable
|
|
|
|
|
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
def _getimage(self, src):
|
|
|
|
imgname, imgext = os.path.splitext(os.path.basename(src))
|
|
|
|
imgtarget = os.path.join(
|
|
|
|
self.targetd,
|
|
|
|
"%s%s" % (slugify(imgname, only_ascii=True, lower=True), imgext)
|
|
|
|
)
|
|
|
|
try:
|
|
|
|
logging.debug('donwloading image %s', src)
|
|
|
|
r = requests.get(
|
|
|
|
src,
|
|
|
|
allow_redirects=True,
|
|
|
|
timeout=60,
|
|
|
|
stream=True
|
|
|
|
)
|
|
|
|
with open(imgtarget, 'wb') as f:
|
|
|
|
for chunk in r.iter_content():
|
|
|
|
if chunk:
|
|
|
|
f.write(chunk)
|
|
|
|
|
|
|
|
self.fm.content = self.fm.content.replace(
|
|
|
|
src,
|
|
|
|
'%s/%s' % (self.fbase, imgname)
|
|
|
|
)
|
|
|
|
except Exception as e:
|
|
|
|
logging.error('pulling image %s failed: %s', src, e)
|
|
|
|
return
|
|
|
|
|
|
|
|
def _get_images(self):
|
|
|
|
logging.debug("trying to save images")
|
|
|
|
soup = BeautifulSoup(self.fm.content, 'lxml')
|
|
|
|
|
|
|
|
embedded = re.compile(r'^data:.*')
|
|
|
|
for img in soup.find_all('img'):
|
|
|
|
src = img.get('src')
|
|
|
|
if not src:
|
|
|
|
continue
|
|
|
|
if embedded.match(src):
|
|
|
|
continue
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
im = urllib.parse.urlparse(src)
|
|
|
|
if not im.scheme:
|
|
|
|
im = im._replace(scheme=self.parsed.scheme)
|
|
|
|
if not im.netloc:
|
|
|
|
im = im._replace(netloc=self.parsed.netloc)
|
|
|
|
|
|
|
|
self._getimage(im.geturl())
|
|
|
|
|
|
|
|
|
|
|
|
#def _getimage(self, src):
|
|
|
|
#tmp = os.path.join(self.tmp, "%s" % slugify(os.path.basename(src))[:200])
|
|
|
|
#try:
|
|
|
|
#r = requests.get(
|
|
|
|
#src,
|
|
|
|
#allow_redirects=True,
|
|
|
|
#timeout=60,
|
|
|
|
#stream=True
|
|
|
|
#)
|
|
|
|
#with open(tmp, 'wb') as f:
|
|
|
|
#for chunk in r.iter_content():
|
|
|
|
#if chunk:
|
|
|
|
#f.write(chunk)
|
|
|
|
|
|
|
|
#logging.debug('trying to embed %s', src)
|
|
|
|
#with open(tmp, 'rb') as imgdata:
|
|
|
|
#data = str(base64.b64encode(imgdata.read()), 'ascii')
|
|
|
|
#mimetype, encoding = mimetypes.guess_type(tmp)
|
|
|
|
#self.fm.content = self.fm.content.replace(
|
|
|
|
#src,
|
|
|
|
#"data:%s;base64,%s" % (mimetype, data)
|
|
|
|
#)
|
|
|
|
#except Exception as e:
|
|
|
|
#logging.error('pulling image %s failed: %s', src, e)
|
|
|
|
#return
|
|
|
|
|
|
|
|
#def _embed_images(self):
|
|
|
|
#logging.debug("trying to embed images")
|
|
|
|
#soup = BeautifulSoup(self.fm.content, 'lxml')
|
|
|
|
|
|
|
|
#embedded = re.compile(r'^data:.*')
|
|
|
|
#for img in soup.find_all('img'):
|
|
|
|
#src = img.get('src')
|
|
|
|
#if not src:
|
|
|
|
#continue
|
|
|
|
#if embedded.match(src):
|
|
|
|
#continue
|
|
|
|
|
|
|
|
#im = urllib.parse.urlparse(src)
|
|
|
|
#if not im.scheme:
|
|
|
|
#im = im._replace(scheme=self.parsed.scheme)
|
|
|
|
#if not im.netloc:
|
|
|
|
#im = im._replace(netloc=self.parsed.netloc)
|
|
|
|
|
|
|
|
#self._getimage(im.geturl())
|
|
|
|
|
|
|
|
|
|
|
|
def save(self):
|
2017-05-23 11:14:47 +01:00
|
|
|
logging.info(
|
|
|
|
"savig offline copy of\n\t%s to:\n\t%s",
|
|
|
|
self.url,
|
|
|
|
self.target
|
|
|
|
)
|
2017-06-28 12:20:26 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
with open(self.target, 'wt') as f:
|
|
|
|
f.write(frontmatter.dumps(self.fm))
|
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
@property
|
|
|
|
def archiveorgurl(self):
|
2017-06-28 12:20:26 +01:00
|
|
|
logging.debug("trying archive.org for %s", self.url)
|
2017-06-12 15:17:29 +01:00
|
|
|
a = self.fetch(
|
|
|
|
"http://archive.org/wayback/available?url=%s" % self.url,
|
|
|
|
)
|
|
|
|
if not a:
|
2017-06-28 12:20:26 +01:00
|
|
|
logging.debug("no entry for %s on archive.org", self.url)
|
2017-06-12 15:17:29 +01:00
|
|
|
return None
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
try:
|
|
|
|
a = json.loads(a.text)
|
2017-06-28 12:20:26 +01:00
|
|
|
aurl = a.get(
|
2017-06-12 15:17:29 +01:00
|
|
|
'archived_snapshots', {}
|
|
|
|
).get(
|
|
|
|
'closest', {}
|
|
|
|
).get(
|
|
|
|
'url', None
|
|
|
|
)
|
2017-06-28 12:20:26 +01:00
|
|
|
logging.debug("found %s in archive.org for %s", aurl, self.url)
|
|
|
|
self.updateurl(aurl)
|
|
|
|
return self.fetch(aurl)
|
2017-06-12 15:17:29 +01:00
|
|
|
except Exception as e:
|
|
|
|
logging.error("archive.org parsing failed: %s", e)
|
|
|
|
return None
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
def fetch(self, url):
|
2017-05-23 11:14:47 +01:00
|
|
|
try:
|
|
|
|
r = requests.get(
|
|
|
|
self.url,
|
|
|
|
allow_redirects=True,
|
|
|
|
timeout=60,
|
2017-06-12 15:17:29 +01:00
|
|
|
headers=self.headers
|
2017-05-23 11:14:47 +01:00
|
|
|
)
|
2017-06-12 15:17:29 +01:00
|
|
|
if r.status_code == requests.codes.ok:
|
|
|
|
return r
|
2017-05-23 11:14:47 +01:00
|
|
|
except Exception as e:
|
2017-06-12 15:17:29 +01:00
|
|
|
return None
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
|
2017-07-04 12:30:32 +01:00
|
|
|
#def read():
|
|
|
|
#if os.path.isfile(self.target):
|
|
|
|
#with open(self.target) as f:
|
|
|
|
#self.fm = frontmatter.loads(f.read())
|
|
|
|
#return
|
2017-06-12 15:17:29 +01:00
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
|
|
|
|
def run(self):
|
|
|
|
if self.exists:
|
|
|
|
logging.info("offline archive for %s already exists", self.url)
|
|
|
|
return
|
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
logging.info("prepairing offline copy of %s", self.url)
|
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
if not self.skip_fetch:
|
|
|
|
r = self.fetch(self.url)
|
|
|
|
|
|
|
|
# in case it's not, try to look for an archive.org url:
|
|
|
|
if not r:
|
|
|
|
logging.warning("couldn't get live version of %s, trying archive.org", self.url)
|
|
|
|
r = self.fetch(self.archiveorgurl)
|
|
|
|
|
|
|
|
# no live and no archive.org entry :((
|
|
|
|
# howver, by miracle, I may already have a copy, so skip if it's there already
|
|
|
|
if not r:
|
|
|
|
logging.error("no live or archive version of %s found :((", self.url)
|
|
|
|
if not self.exists:
|
|
|
|
self.save()
|
|
|
|
return
|
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
self.fm.content = r.text
|
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
self._get_images()
|
|
|
|
self.save()
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
|
|
|
|
class Renderer(object):
|
|
|
|
def __init__(self):
|
|
|
|
self.sitevars = dict(shared.config.items('site'))
|
|
|
|
self.sitevars['author'] = dict(shared.config.items('author'))
|
|
|
|
self.sitevars['author']['socials'] = dict(shared.config.items('socials'))
|
|
|
|
|
|
|
|
self.jinjaldr = jinja2.FileSystemLoader(
|
|
|
|
searchpath=shared.config.get('source', 'templatesdir')
|
|
|
|
)
|
|
|
|
self.j2 = jinja2.Environment(loader=self.jinjaldr)
|
|
|
|
self.j2.filters['date'] = Renderer.jinja_filter_date
|
|
|
|
self.j2.filters['search'] = Renderer.jinja_filter_search
|
|
|
|
self.j2.filters['slugify'] = Renderer.jinja_filter_slugify
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@staticmethod
|
|
|
|
def jinja_filter_date(d, form='%Y-%m-%d %H:%m:%S'):
|
|
|
|
if d == 'now':
|
2017-06-28 12:20:26 +01:00
|
|
|
d = arrow.now().datetime
|
2017-05-23 11:14:47 +01:00
|
|
|
if form == 'c':
|
2017-06-28 12:20:26 +01:00
|
|
|
return d.isoformat()
|
|
|
|
#form = '%Y-%m-%dT%H:%M:%S%z'
|
2017-05-23 11:14:47 +01:00
|
|
|
return d.strftime(form)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@staticmethod
|
|
|
|
def jinja_filter_slugify(s):
|
|
|
|
return slugify(s, only_ascii=True, lower=True)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@staticmethod
|
|
|
|
def jinja_filter_search(s, r):
|
|
|
|
if r in s:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
# based on http://stackoverflow.com/a/10075210
|
2017-05-26 10:14:24 +01:00
|
|
|
class ExifTool(shared.CMDLine):
|
2017-05-23 11:14:47 +01:00
|
|
|
""" Handles calling external binary `exiftool` in an efficient way """
|
|
|
|
sentinel = "{ready}\n"
|
2017-05-23 11:13:35 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:13:35 +01:00
|
|
|
def __init__(self):
|
2017-05-23 11:14:47 +01:00
|
|
|
super().__init__('exiftool')
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
|
|
|
def run(self, *filenames):
|
2017-05-31 13:53:47 +01:00
|
|
|
return json.loads(self.execute(
|
|
|
|
'-sort',
|
|
|
|
'-json',
|
|
|
|
'-MIMEType',
|
|
|
|
'-FileType',
|
|
|
|
'-FileName',
|
|
|
|
'-ModifyDate',
|
|
|
|
'-CreateDate',
|
|
|
|
'-DateTimeOriginal',
|
|
|
|
'-ImageHeight',
|
|
|
|
'-ImageWidth',
|
|
|
|
'-Aperture',
|
|
|
|
'-FOV',
|
|
|
|
'-ISO',
|
|
|
|
'-FocalLength',
|
|
|
|
'-FNumber',
|
|
|
|
'-FocalLengthIn35mmFormat',
|
|
|
|
'-ExposureTime',
|
|
|
|
'-Copyright',
|
|
|
|
'-Artist',
|
|
|
|
'-Model',
|
|
|
|
'-GPSLongitude#',
|
|
|
|
'-GPSLatitude#',
|
|
|
|
'-LensID',
|
|
|
|
*filenames))
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-06-04 11:38:36 +01:00
|
|
|
class Comment(BaseRenderable):
|
2017-06-02 11:19:55 +01:00
|
|
|
def __init__(self, path):
|
|
|
|
logging.debug("initiating comment object from %s", path)
|
|
|
|
self.path = path
|
|
|
|
self.fname, self.ext = os.path.splitext(os.path.basename(self.path))
|
|
|
|
self.mtime = int(os.path.getmtime(self.path))
|
|
|
|
self.meta = {}
|
|
|
|
self.content = ''
|
2017-06-04 11:38:36 +01:00
|
|
|
self.tmplfile = 'comment.html'
|
2017-06-02 11:19:55 +01:00
|
|
|
self.__parse()
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
def __repr__(self):
|
|
|
|
return "%s" % (self.path)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
def __parse(self):
|
|
|
|
with open(self.path, mode='rt') as f:
|
|
|
|
self.meta, self.content = frontmatter.parse(f.read())
|
|
|
|
|
|
|
|
@property
|
|
|
|
def reacji(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_reacji'):
|
|
|
|
return self._reacji
|
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
t = self.meta.get('type', 'webmention')
|
|
|
|
typemap = {
|
|
|
|
'like-of': '👍',
|
|
|
|
'bookmark-of': '🔖',
|
|
|
|
'favorite': '★',
|
|
|
|
}
|
|
|
|
|
2017-06-03 12:07:03 +01:00
|
|
|
self._reacji = ''
|
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
if t in typemap.keys():
|
2017-06-03 12:07:03 +01:00
|
|
|
self._reacji = typemap[t]
|
|
|
|
else:
|
|
|
|
maybe = clean(self.content).strip()
|
|
|
|
if maybe in UNICODE_EMOJI:
|
|
|
|
self._reacji = maybe
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-06-03 12:07:03 +01:00
|
|
|
return self._reacji
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
|
|
|
def html(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_html'):
|
|
|
|
return self._html
|
|
|
|
|
|
|
|
self._html = shared.Pandoc().convert(self.content)
|
|
|
|
return self._html
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
|
|
|
def tmplvars(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_tmplvars'):
|
|
|
|
return self._tmplvars
|
|
|
|
|
|
|
|
self._tmplvars = {
|
2017-06-02 11:19:55 +01:00
|
|
|
'published': self.published.datetime,
|
|
|
|
'author': dict(self.meta.get('author', {})),
|
|
|
|
'content': self.content,
|
|
|
|
'html': self.html,
|
2017-06-03 12:07:03 +01:00
|
|
|
'source': self.source,
|
2017-06-28 12:20:26 +01:00
|
|
|
'target': self.targeturl,
|
2017-06-02 11:19:55 +01:00
|
|
|
'type': self.meta.get('type', 'webmention'),
|
2017-06-03 12:07:03 +01:00
|
|
|
'reacji': self.reacji,
|
2017-06-04 11:38:36 +01:00
|
|
|
'fname': self.fname
|
2017-06-02 11:19:55 +01:00
|
|
|
}
|
2017-06-03 12:07:03 +01:00
|
|
|
return self._tmplvars
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
|
|
|
def published(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_published'):
|
|
|
|
return self._published
|
|
|
|
self._published = arrow.get(self.meta.get('date', self.mtime))
|
|
|
|
return self._published
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
|
|
|
def pubtime(self):
|
|
|
|
return int(self.published.timestamp)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
|
|
|
def source(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_source'):
|
|
|
|
return self._source
|
|
|
|
s = self.meta.get('source', '')
|
|
|
|
domains = shared.config.get('site', 'domains').split(' ')
|
|
|
|
self._source = s
|
|
|
|
for d in domains:
|
2017-06-02 11:19:55 +01:00
|
|
|
if d in s:
|
2017-06-03 12:07:03 +01:00
|
|
|
self._source = ''
|
|
|
|
return self._source
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
@property
|
|
|
|
def targeturl(self):
|
|
|
|
if hasattr(self, '_targeturl'):
|
|
|
|
return self._targeturl
|
|
|
|
t = self.meta.get('target', shared.config.get('site', 'url'))
|
|
|
|
self._targeturl = '{p.path}'.format(p=urllib.parse.urlparse(t)).strip('/')
|
|
|
|
return self._targeturl
|
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
|
|
|
def target(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_target'):
|
|
|
|
return self._target
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-04 11:38:36 +01:00
|
|
|
targetdir = os.path.abspath(os.path.join(
|
|
|
|
shared.config.get('target', 'builddir'),
|
|
|
|
shared.config.get('site', 'commentspath'),
|
|
|
|
self.fname
|
|
|
|
))
|
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
self._target = os.path.join(targetdir, 'index.html')
|
|
|
|
return self._target
|
|
|
|
|
|
|
|
|
|
|
|
async def render(self, renderer):
|
|
|
|
logging.info("rendering and saving comment %s", self.fname)
|
|
|
|
|
|
|
|
if not shared.config.getboolean('params', 'force') and os.path.isfile(self.target):
|
|
|
|
ttime = int(os.path.getmtime(self.target))
|
2017-06-04 11:38:36 +01:00
|
|
|
logging.debug('ttime is %d mtime is %d', ttime, self.mtime)
|
|
|
|
if ttime == self.mtime:
|
2017-06-28 12:20:26 +01:00
|
|
|
logging.debug(
|
|
|
|
'%s exists and up-to-date (lastmod: %d)',
|
|
|
|
self.target,
|
|
|
|
ttime
|
|
|
|
)
|
2017-06-04 11:38:36 +01:00
|
|
|
return
|
|
|
|
|
|
|
|
tmplvars = {
|
|
|
|
'reply': self.tmplvars,
|
|
|
|
'site': renderer.sitevars,
|
|
|
|
'taxonomy': {},
|
|
|
|
}
|
|
|
|
r = renderer.j2.get_template(self.tmplfile).render(tmplvars)
|
2017-06-12 15:40:30 +01:00
|
|
|
self.writerendered(r)
|
2017-06-04 11:38:36 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
|
|
|
|
class Comments(object):
|
|
|
|
def __init__(self):
|
|
|
|
self.files = glob.glob(os.path.join(
|
|
|
|
shared.config.get('source', 'commentsdir'),
|
|
|
|
"*.md"
|
|
|
|
))
|
|
|
|
self.bytarget = {}
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
def __getitem__(self, key):
|
|
|
|
return self.bytarget.get(key, BaseIter())
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
def populate(self):
|
|
|
|
for fpath in self.files:
|
|
|
|
item = Comment(fpath)
|
|
|
|
t = item.target
|
|
|
|
if not self.bytarget.get(t):
|
|
|
|
self.bytarget[t] = BaseIter()
|
|
|
|
self.bytarget[t].append(item.pubtime, item)
|
|
|
|
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
class Images(BaseIter):
|
|
|
|
def __init__(self, extensions=['jpg', 'gif', 'png']):
|
|
|
|
super(Images, self).__init__()
|
|
|
|
logging.info(
|
|
|
|
"initiating images with extensions: %s",
|
|
|
|
extensions
|
|
|
|
)
|
2017-05-23 11:13:35 +01:00
|
|
|
self.files = []
|
2017-05-23 11:14:47 +01:00
|
|
|
self.data = {}
|
|
|
|
# if anyone knows how to do this in a more pythonic way, please tell me
|
|
|
|
paths = [
|
|
|
|
shared.config.get('source', 'filesdir'),
|
|
|
|
shared.config.get('source', 'photosdir')
|
|
|
|
]
|
|
|
|
for p in paths:
|
|
|
|
for ext in extensions:
|
|
|
|
self.files += glob.glob(os.path.join(p, "*.%s" % ext))
|
|
|
|
|
|
|
|
|
|
|
|
def populate(self):
|
|
|
|
with ExifTool() as e:
|
2017-06-12 15:40:30 +01:00
|
|
|
_meta = e.run(*self.files)
|
2017-05-23 11:14:47 +01:00
|
|
|
# parsing the returned meta into a dict of [filename]={meta}
|
|
|
|
for e in _meta:
|
|
|
|
if 'FileName' not in e:
|
|
|
|
logging.error("missing 'FileName' in element %s", e)
|
|
|
|
continue
|
|
|
|
fname = os.path.basename(e['FileName'])
|
|
|
|
del(e['FileName'])
|
|
|
|
# duplicate files are going to be a problem, so don't send it
|
|
|
|
# away with a simple error log entry
|
|
|
|
if fname in self.data:
|
|
|
|
raise ValueError('filename collision: %s', fname)
|
|
|
|
# convert dates
|
|
|
|
for k, v in e.items():
|
|
|
|
e[k] = self.exifdate(v)
|
|
|
|
|
|
|
|
self.data[fname] = WebImage(fname, e)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def exifdate(self, value):
|
|
|
|
""" converts and EXIF date string to ISO 8601 format
|
2017-05-23 11:13:35 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
:param value: EXIF date (2016:05:01 00:08:24)
|
|
|
|
:type arg1: str
|
|
|
|
:return: ISO 8601 string with UTC timezone 2016-05-01T00:08:24+0000
|
|
|
|
:rtype: str
|
|
|
|
"""
|
|
|
|
if not isinstance(value, str):
|
|
|
|
return value
|
|
|
|
match = shared.EXIFREXEG.match(value)
|
|
|
|
if not match:
|
|
|
|
return value
|
|
|
|
return "%s-%s-%sT%s+0000" % (
|
|
|
|
match.group('year'),
|
|
|
|
match.group('month'),
|
|
|
|
match.group('day'),
|
|
|
|
match.group('time')
|
|
|
|
)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
class WebImage(object):
|
|
|
|
def __init__(self, fname, meta):
|
|
|
|
logging.info(
|
|
|
|
"parsing image: %s",
|
|
|
|
fname
|
|
|
|
)
|
|
|
|
self.meta = meta
|
|
|
|
self.fpath = os.path.abspath(meta.get('SourceFile', fname))
|
|
|
|
self.fname, self.ext = os.path.splitext(fname)
|
|
|
|
self.alttext = ''
|
|
|
|
self.sizes = []
|
|
|
|
self.fallbacksize = int(shared.config.get('common','fallbackimg', fallback='720'))
|
2017-06-12 15:17:29 +01:00
|
|
|
self.cl = ''
|
2017-05-31 13:53:47 +01:00
|
|
|
self.singleimage = False
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
for size in shared.config.options('downsize'):
|
|
|
|
sizeext = shared.config.get('downsize', size)
|
|
|
|
fname = "%s_%s%s" % (self.fname, sizeext, self.ext)
|
|
|
|
self.sizes.append((
|
|
|
|
int(size),
|
|
|
|
{
|
|
|
|
'fpath': os.path.join(
|
|
|
|
shared.config.get('target', 'filesdir'),
|
|
|
|
fname
|
|
|
|
),
|
|
|
|
'url': "%s/%s/%s" % (
|
|
|
|
shared.config.get('site', 'url'),
|
|
|
|
shared.config.get('source', 'files'),
|
|
|
|
fname
|
|
|
|
),
|
|
|
|
'crop': shared.config.getboolean('crop', size, fallback=False),
|
|
|
|
}
|
|
|
|
))
|
|
|
|
|
|
|
|
self.sizes = sorted(self.sizes, reverse=False)
|
|
|
|
|
|
|
|
self.target = False
|
|
|
|
if self.is_downsizeable:
|
|
|
|
self.fallback = [e for e in self.sizes if e[0] == self.fallbacksize][0][1]['url']
|
|
|
|
self.target = self.sizes[-1][1]['url']
|
|
|
|
else:
|
|
|
|
self.fallback = "%s/%s/%s" % (
|
|
|
|
shared.config.get('site', 'url'),
|
|
|
|
shared.config.get('source', 'files'),
|
|
|
|
"%s%s" % (self.fname, self.ext)
|
|
|
|
)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def __str__(self):
|
2017-06-12 15:17:29 +01:00
|
|
|
if self.is_downsizeable:
|
|
|
|
if self.singleimage and not self.cl:
|
|
|
|
self.cl = '.u-photo'
|
|
|
|
elif self.singleimage:
|
|
|
|
self.cl = '.u-photo %s' % self.cl
|
|
|
|
|
|
|
|
return '[![%s](%s "%s%s"){.adaptimg}](%s){.adaptive %s}' % (
|
2017-05-23 11:14:47 +01:00
|
|
|
self.alttext,
|
2017-05-26 10:14:24 +01:00
|
|
|
self.fallback,
|
|
|
|
self.fname,
|
2017-06-12 15:17:29 +01:00
|
|
|
self.ext,
|
|
|
|
self.target,
|
|
|
|
self.cl
|
2017-05-26 10:14:24 +01:00
|
|
|
)
|
2017-05-23 11:14:47 +01:00
|
|
|
else:
|
2017-06-12 15:17:29 +01:00
|
|
|
if not self.cl:
|
|
|
|
self.cl = '.aligncenter'
|
|
|
|
return '![%s](%s "%s%s"){%s}' % (
|
2017-05-23 11:14:47 +01:00
|
|
|
self.alttext,
|
2017-06-12 15:17:29 +01:00
|
|
|
self.fallback,
|
2017-05-23 11:14:47 +01:00
|
|
|
self.fname,
|
2017-06-12 15:17:29 +01:00
|
|
|
self.ext,
|
|
|
|
self.cl
|
2017-05-23 11:14:47 +01:00
|
|
|
)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
@property
|
|
|
|
def exif(self):
|
|
|
|
if not self.is_photo:
|
|
|
|
return {}
|
|
|
|
|
|
|
|
if hasattr(self, '_exif'):
|
|
|
|
return self._exif
|
|
|
|
|
|
|
|
exif = {}
|
|
|
|
mapping = {
|
|
|
|
'camera': [
|
|
|
|
'Model'
|
|
|
|
],
|
|
|
|
'aperture': [
|
|
|
|
'FNumber',
|
|
|
|
'Aperture'
|
|
|
|
],
|
|
|
|
'shutter_speed': [
|
|
|
|
'ExposureTime'
|
|
|
|
],
|
|
|
|
'focallength35mm': [
|
|
|
|
'FocalLengthIn35mmFormat',
|
|
|
|
],
|
|
|
|
'focallength': [
|
|
|
|
'FocalLength',
|
|
|
|
],
|
|
|
|
'iso': [
|
|
|
|
'ISO'
|
|
|
|
],
|
|
|
|
'lens': [
|
|
|
|
'LensID',
|
|
|
|
],
|
|
|
|
'date': [
|
|
|
|
'CreateDate',
|
|
|
|
'DateTimeOriginal',
|
|
|
|
],
|
|
|
|
'geo_latitude': [
|
|
|
|
'GPSLatitude'
|
|
|
|
],
|
|
|
|
'geo_longitude': [
|
|
|
|
'GPSLongitude'
|
|
|
|
],
|
|
|
|
}
|
|
|
|
|
|
|
|
for ekey, candidates in mapping.items():
|
|
|
|
for candidate in candidates:
|
|
|
|
maybe = self.meta.get(candidate, None)
|
|
|
|
if maybe:
|
|
|
|
if 'geo_' in ekey:
|
|
|
|
exif[ekey] = round(float(maybe), 5)
|
|
|
|
else:
|
|
|
|
exif[ekey] = maybe
|
|
|
|
break
|
|
|
|
|
|
|
|
self._exif = exif
|
|
|
|
return self._exif
|
|
|
|
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def rssenclosure(self):
|
|
|
|
""" Returns the largest available image for RSS to add as attachment """
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_rssenclosure'):
|
|
|
|
return self._rssenclosure
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
target = self.sizes[-1][1]
|
2017-06-03 12:07:03 +01:00
|
|
|
self._rssenclosure = {
|
2017-05-23 11:14:47 +01:00
|
|
|
'mime': magic.Magic(mime=True).from_file(target['fpath']),
|
|
|
|
'url': target['url'],
|
2017-06-28 12:20:26 +01:00
|
|
|
'size': os.path.getsize(target['fpath']),
|
|
|
|
'fname': self.fname
|
2017-05-23 11:14:47 +01:00
|
|
|
}
|
2017-06-03 12:07:03 +01:00
|
|
|
return self._rssenclosure
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def is_photo(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_is_photo'):
|
|
|
|
return self._is_photo
|
|
|
|
|
|
|
|
self._is_photo = False
|
|
|
|
#if not pattern or not isinstance(pattern, str):
|
|
|
|
# return False
|
|
|
|
pattern = re.compile(shared.config.get('photo', 'regex'))
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
cpr = self.meta.get('Copyright', '')
|
|
|
|
art = self.meta.get('Artist', '')
|
|
|
|
if not cpr and not art:
|
|
|
|
return False
|
|
|
|
|
2017-06-03 12:07:03 +01:00
|
|
|
if cpr and art:
|
|
|
|
if pattern.search(cpr) or pattern.search(art):
|
|
|
|
self._is_photo = True
|
2017-05-23 11:13:35 +01:00
|
|
|
|
2017-06-03 12:07:03 +01:00
|
|
|
return self._is_photo
|
2017-05-23 11:13:35 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def is_downsizeable(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_is_downsizeable'):
|
|
|
|
return self._is_downsizeable
|
|
|
|
|
|
|
|
self._is_downsizeable = False
|
2017-05-23 11:14:47 +01:00
|
|
|
""" Check if the image is large enough and jpeg or png in order to
|
|
|
|
downsize it """
|
|
|
|
fb = self.sizes[-1][0]
|
|
|
|
ftype = self.meta.get('FileType', None)
|
|
|
|
if not ftype:
|
2017-06-03 12:07:03 +01:00
|
|
|
return self._is_downsizeable
|
2017-05-23 11:14:47 +01:00
|
|
|
if ftype.lower() == 'jpeg' or ftype.lower() == 'png':
|
|
|
|
width = int(self.meta.get('ImageWidth', 0))
|
|
|
|
height = int(self.meta.get('ImageHeight', 0))
|
|
|
|
if width > fb or height > fb:
|
2017-06-03 12:07:03 +01:00
|
|
|
self._is_downsizeable = True
|
|
|
|
|
|
|
|
return self._is_downsizeable
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def _copy(self):
|
|
|
|
target = os.path.join(
|
|
|
|
shared.config.get('target', 'filesdir'),
|
|
|
|
"%s%s" % (self.fname, self.ext)
|
|
|
|
)
|
|
|
|
if not os.path.isfile(target):
|
|
|
|
logging.debug("can't downsize %s, copying instead" % self.fname)
|
|
|
|
shutil.copy(self.fpath, target)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def _watermark(self, img):
|
|
|
|
""" Composite image by adding watermark file over it """
|
|
|
|
wmarkfile = os.path.join(
|
|
|
|
shared.config.get('common', 'basedir'),
|
|
|
|
shared.config.get('common', 'watermark')
|
|
|
|
)
|
|
|
|
if not os.path.isfile(wmarkfile):
|
|
|
|
return img
|
|
|
|
|
|
|
|
with wand.image.Image(filename=wmarkfile) as wmark:
|
|
|
|
if img.width > img.height:
|
|
|
|
w = img.width * 0.16
|
|
|
|
h = wmark.height * (w / wmark.width)
|
|
|
|
x = img.width - w - (img.width * 0.01)
|
|
|
|
y = img.height - h - (img.height * 0.01)
|
|
|
|
else:
|
|
|
|
w = img.height * 0.16
|
|
|
|
h = wmark.height * (w / wmark.width)
|
|
|
|
x = img.width - h - (img.width * 0.01)
|
|
|
|
y = img.height - w - (img.height * 0.01)
|
|
|
|
|
|
|
|
w = round(w)
|
|
|
|
h = round(h)
|
|
|
|
x = round(x)
|
|
|
|
y = round(y)
|
|
|
|
|
|
|
|
wmark.resize(w, h)
|
|
|
|
if img.width <= img.height:
|
|
|
|
wmark.rotate(-90)
|
|
|
|
img.composite(image=wmark, left=x, top=y)
|
|
|
|
return img
|
|
|
|
|
|
|
|
|
|
|
|
def _intermediate_dimensions(self, size, width, height, crop = False):
|
|
|
|
size = int(size)
|
|
|
|
w = width
|
|
|
|
h = height
|
|
|
|
if (width > height and not crop) \
|
|
|
|
or (width < height and crop):
|
|
|
|
w = size
|
|
|
|
h = int(float(size / width) * height)
|
2017-05-23 11:13:35 +01:00
|
|
|
else:
|
2017-05-23 11:14:47 +01:00
|
|
|
h = size
|
|
|
|
w = int(float(size / height) * width)
|
|
|
|
return (w, h)
|
|
|
|
|
|
|
|
|
|
|
|
def _intermediate(self, img, size, meta, existing = []):
|
|
|
|
if img.width <= size and img.height <= size:
|
|
|
|
return False
|
|
|
|
|
|
|
|
crop = meta.get('crop', False)
|
|
|
|
with img.clone() as thumb:
|
|
|
|
width, height = self._intermediate_dimensions(
|
|
|
|
size,
|
|
|
|
img.width,
|
|
|
|
img.height,
|
|
|
|
crop
|
|
|
|
)
|
|
|
|
thumb.resize(width, height)
|
|
|
|
|
|
|
|
if crop:
|
|
|
|
thumb.liquid_rescale(size, size, 1, 1)
|
|
|
|
|
|
|
|
if self.meta.get('FileType', 'jpeg').lower() == 'jpeg':
|
|
|
|
thumb.compression_quality = 86
|
|
|
|
thumb.unsharp_mask(
|
|
|
|
radius=0,
|
|
|
|
sigma=0.5,
|
|
|
|
amount=1,
|
|
|
|
threshold=0.03
|
2017-05-23 11:13:35 +01:00
|
|
|
)
|
2017-05-23 11:14:47 +01:00
|
|
|
thumb.format = 'pjpeg'
|
|
|
|
|
|
|
|
# this is to make sure pjpeg happens
|
|
|
|
with open(meta['fpath'], 'wb') as f:
|
|
|
|
thumb.save(file=f)
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
async def downsize(self, existing = []):
|
|
|
|
if not self.is_downsizeable:
|
|
|
|
self._copy()
|
|
|
|
return
|
|
|
|
|
|
|
|
logging.info("checking downsizing for %s", self.fname)
|
|
|
|
needed = shared.config.getboolean('params', 'regenerate', fallback=False)
|
|
|
|
|
|
|
|
if not needed:
|
|
|
|
for (size, meta) in self.sizes:
|
|
|
|
if meta['fpath'] not in existing:
|
|
|
|
needed = True
|
|
|
|
|
|
|
|
if not needed:
|
|
|
|
logging.debug("downsizing not needed for %s", self.fname)
|
|
|
|
return
|
|
|
|
|
|
|
|
with wand.image.Image(filename=self.fpath) as img:
|
|
|
|
img.auto_orient()
|
|
|
|
|
|
|
|
if self.is_photo:
|
|
|
|
logging.info("%s is a photo", self.fpath)
|
|
|
|
img = self._watermark(img)
|
|
|
|
|
|
|
|
for (size, meta) in self.sizes:
|
|
|
|
self._intermediate(img, size, meta, existing)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
class Taxonomy(BaseIter):
|
|
|
|
def __init__(self, name = None, taxonomy = None, slug = None):
|
|
|
|
super(Taxonomy, self).__init__()
|
|
|
|
self.name = name
|
|
|
|
if name and not slug:
|
|
|
|
self.slug = slugify(name, only_ascii=True, lower=True)
|
|
|
|
else:
|
|
|
|
self.slug = slug
|
|
|
|
self.taxonomy = taxonomy
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def pages(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_pages'):
|
|
|
|
return self._pages
|
|
|
|
self._pages = math.ceil(len(self.data) / shared.config.getint('common', 'pagination'))
|
|
|
|
return self._pages
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return "taxonomy %s with %d items" % (self.taxonomy, len(self.data))
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def basep(self):
|
|
|
|
p = shared.config.get('target', 'builddir')
|
|
|
|
if self.taxonomy:
|
|
|
|
p = os.path.join(p, self.taxonomy)
|
|
|
|
return p
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def myp(self):
|
|
|
|
p = self.basep
|
|
|
|
if self.slug:
|
|
|
|
return os.path.join(p,self.slug)
|
|
|
|
return p
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def feedp(self):
|
|
|
|
return os.path.join(self.myp, 'feed')
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def pagep(self):
|
|
|
|
return os.path.join(self.myp, 'page')
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def baseurl(self):
|
|
|
|
if self.taxonomy and self.slug:
|
|
|
|
return "/%s/%s/" % (self.taxonomy, self.slug)
|
|
|
|
else:
|
|
|
|
return '/'
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def mtime(self):
|
2017-06-02 11:19:55 +01:00
|
|
|
if hasattr(self, '_mtime'):
|
|
|
|
return self._mtime
|
|
|
|
self._mtime = int(list(sorted(self.data.keys(), reverse=True))[0])
|
|
|
|
return self._mtime
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def __mkdirs(self):
|
|
|
|
check = [self.basep, self.myp, self.feedp]
|
|
|
|
|
|
|
|
if self.pages > 1:
|
|
|
|
check.append(self.pagep)
|
|
|
|
for i in range(2, self.pages+1):
|
|
|
|
subpagep = os.path.abspath(os.path.join(
|
|
|
|
self.pagep,
|
|
|
|
'%d' % i
|
|
|
|
))
|
|
|
|
check.append(subpagep)
|
|
|
|
|
|
|
|
for p in check:
|
|
|
|
if not os.path.isdir(p):
|
|
|
|
logging.debug("creating dir %s", p)
|
|
|
|
os.mkdir(p)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def tpath(self, page):
|
|
|
|
if page == 1:
|
|
|
|
return "%s/index.html" % (self.myp)
|
|
|
|
else:
|
|
|
|
return "%s/%d/index.html" % (self.pagep, page)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
async def render(self, renderer):
|
2017-06-28 12:20:26 +01:00
|
|
|
#if not self.slug or self.slug is 'None':
|
|
|
|
#return
|
2017-06-12 15:17:29 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
self.__mkdirs()
|
|
|
|
page = 1
|
|
|
|
testpath = self.tpath(page)
|
|
|
|
if not shared.config.getboolean('params', 'force') and os.path.isfile(testpath):
|
|
|
|
ttime = int(os.path.getmtime(testpath))
|
2017-06-02 11:19:55 +01:00
|
|
|
mtime = self.mtime
|
|
|
|
if ttime == mtime:
|
2017-05-23 11:14:47 +01:00
|
|
|
logging.info('taxonomy index for "%s" exists and up-to-date (lastmod: %d)', self.slug, ttime)
|
|
|
|
return
|
2017-06-02 11:19:55 +01:00
|
|
|
else:
|
|
|
|
logging.info('taxonomy update needed: %s timestamp is %d, last post timestamp is %d (%s)',
|
|
|
|
testpath,
|
|
|
|
ttime,
|
|
|
|
mtime,
|
|
|
|
self.data[mtime].fname
|
|
|
|
)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
while page <= self.pages:
|
|
|
|
self.renderpage(renderer, page)
|
|
|
|
page = page+1
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def renderpage(self, renderer, page):
|
|
|
|
pagination = int(shared.config.get('common', 'pagination'))
|
|
|
|
start = int((page-1) * pagination)
|
|
|
|
end = int(start + pagination)
|
|
|
|
|
|
|
|
posttmpls = [self.data[k].tmplvars for k in list(sorted(
|
|
|
|
self.data.keys(), reverse=True))[start:end]]
|
|
|
|
|
|
|
|
target = self.tpath(page)
|
|
|
|
logging.info("rendering taxonomy page %d to %s", page, target)
|
|
|
|
tmplvars = {
|
|
|
|
'taxonomy': {
|
|
|
|
'url': self.baseurl,
|
|
|
|
'name': self.name,
|
|
|
|
'taxonomy': self.taxonomy,
|
|
|
|
'paged': page,
|
|
|
|
'total': self.pages,
|
2017-06-12 15:17:29 +01:00
|
|
|
'perpage': pagination,
|
|
|
|
'lastmod': arrow.get(self.mtime).datetime
|
2017-05-23 11:14:47 +01:00
|
|
|
},
|
|
|
|
'site': renderer.sitevars,
|
|
|
|
'posts': posttmpls,
|
|
|
|
}
|
|
|
|
|
|
|
|
r = renderer.j2.get_template('archive.html').render(tmplvars)
|
|
|
|
with open(target, "wt") as html:
|
|
|
|
html.write(r)
|
2017-06-02 11:19:55 +01:00
|
|
|
os.utime(target, (self.mtime, self.mtime))
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
if 1 == page:
|
2017-06-28 12:20:26 +01:00
|
|
|
#target = os.path.join(self.feedp, 'index.rss')
|
|
|
|
#logging.info("rendering RSS feed to %s", target)
|
|
|
|
#r = renderer.j2.get_template('rss.html').render(tmplvars)
|
|
|
|
#with open(target, "wt") as html:
|
|
|
|
#html.write(r)
|
|
|
|
#os.utime(target, (self.mtime, self.mtime))
|
|
|
|
|
|
|
|
target = os.path.join(self.feedp, 'index.atom')
|
|
|
|
logging.info("rendering Atom feed to %s", target)
|
|
|
|
r = renderer.j2.get_template('atom.html').render(tmplvars)
|
2017-05-23 11:14:47 +01:00
|
|
|
with open(target, "wt") as html:
|
|
|
|
html.write(r)
|
|
|
|
os.utime(target, (self.mtime, self.mtime))
|
|
|
|
|
2017-06-03 12:07:03 +01:00
|
|
|
# ---
|
|
|
|
# this is a joke
|
|
|
|
# see http://indieweb.org/YAMLFeed
|
|
|
|
# don't do YAMLFeeds.
|
|
|
|
if 1 == page:
|
2017-06-04 11:38:36 +01:00
|
|
|
fm = frontmatter.loads('')
|
|
|
|
fm.metadata = {
|
2017-06-03 12:07:03 +01:00
|
|
|
'site': {
|
|
|
|
'author': renderer.sitevars['author'],
|
|
|
|
'url': renderer.sitevars['url'],
|
|
|
|
'title': renderer.sitevars['title'],
|
|
|
|
},
|
|
|
|
'items': [],
|
|
|
|
}
|
|
|
|
|
|
|
|
for p in posttmpls:
|
2017-06-04 11:38:36 +01:00
|
|
|
fm.metadata['items'].append({
|
2017-06-03 12:07:03 +01:00
|
|
|
'title': p['title'],
|
|
|
|
'url': "%s/%s/" % ( renderer.sitevars['url'], p['slug']),
|
|
|
|
'content': p['content'],
|
|
|
|
'summary': p['summary'],
|
|
|
|
'published': p['published'],
|
|
|
|
'updated': p['updated'],
|
|
|
|
})
|
|
|
|
|
|
|
|
target = os.path.join(self.feedp, 'index.yml')
|
|
|
|
logging.info("rendering YAML feed to %s", target)
|
|
|
|
with open(target, "wt") as html:
|
|
|
|
html.write(frontmatter.dumps(fm))
|
|
|
|
os.utime(target, (self.mtime, self.mtime))
|
|
|
|
# ---
|
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
if 1 == page:
|
|
|
|
if not self.taxonomy or self.taxonomy == 'category':
|
|
|
|
t = shared.config.get('site', 'websuburl')
|
|
|
|
data = {
|
|
|
|
'hub.mode': 'publish',
|
|
|
|
'hub.url': "%s%s" % (
|
|
|
|
shared.config.get('site', 'url'), self.baseurl
|
|
|
|
)
|
|
|
|
}
|
|
|
|
logging.info("pinging %s with data %s", t, data)
|
|
|
|
requests.post(t, data=data)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
class Content(BaseIter):
|
2017-06-02 11:19:55 +01:00
|
|
|
def __init__(self, images, comments, extensions=['md']):
|
2017-05-23 11:14:47 +01:00
|
|
|
super(Content, self).__init__()
|
|
|
|
self.images = images
|
2017-06-02 11:19:55 +01:00
|
|
|
self.comments = comments
|
2017-05-23 11:14:47 +01:00
|
|
|
basepath = shared.config.get('source', 'contentdir')
|
|
|
|
self.files = []
|
|
|
|
for ext in extensions:
|
|
|
|
self.files += glob.glob(os.path.join(basepath, "*", "*.%s" % ext))
|
|
|
|
self.tags = {}
|
|
|
|
self.categories = {}
|
|
|
|
self.front = Taxonomy()
|
2017-06-02 11:19:55 +01:00
|
|
|
self.shortslugmap = {}
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def populate(self):
|
2017-05-26 10:14:24 +01:00
|
|
|
now = arrow.utcnow().timestamp
|
2017-05-23 11:14:47 +01:00
|
|
|
for fpath in self.files:
|
2017-06-02 11:19:55 +01:00
|
|
|
item = Singular(fpath, self.images, self.comments)
|
2017-05-23 11:14:47 +01:00
|
|
|
self.append(item.pubtime, item)
|
2017-06-02 11:19:55 +01:00
|
|
|
#self.shortslugmap[item.shortslug] = item.fname
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
if item.pubtime > now:
|
|
|
|
logging.warning("skipping future post %s", item.fname)
|
|
|
|
continue
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
if item.isonfront:
|
|
|
|
self.front.append(item.pubtime, item)
|
|
|
|
|
|
|
|
if item.iscategorised:
|
|
|
|
if item.category not in self.categories:
|
|
|
|
self.categories[item.category] = Taxonomy(item.category, 'category')
|
|
|
|
self.categories[item.category].append(item.pubtime, item)
|
|
|
|
|
|
|
|
for tag in item.tags:
|
|
|
|
tslug = slugify(tag, only_ascii=True, lower=True)
|
|
|
|
if tslug not in self.tags:
|
|
|
|
self.tags[tslug] = Taxonomy(tag, 'tag', tslug)
|
|
|
|
self.tags[tslug].append(item.pubtime, item)
|
|
|
|
self.symlinktag(tslug, item.path)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def symlinktag(self, tslug, fpath):
|
|
|
|
fdir, fname = os.path.split(fpath)
|
|
|
|
tagpath = os.path.join(shared.config.get('source', 'tagsdir'), tslug)
|
|
|
|
if not os.path.isdir(tagpath):
|
|
|
|
os.mkdir(tagpath)
|
|
|
|
sympath = os.path.relpath(fdir, tagpath)
|
|
|
|
dst = os.path.join(tagpath, fname)
|
|
|
|
src = os.path.join(sympath, fname)
|
|
|
|
if not os.path.islink(dst):
|
|
|
|
os.symlink(src, dst)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def sitemap(self):
|
|
|
|
target = os.path.join(
|
|
|
|
shared.config.get('target', 'builddir'),
|
|
|
|
'sitemap.txt'
|
|
|
|
)
|
|
|
|
urls = []
|
2017-05-26 10:14:24 +01:00
|
|
|
for item in self.data.values():
|
2017-05-23 11:14:47 +01:00
|
|
|
urls.append( "%s/%s/" % (
|
|
|
|
shared.config.get('site', 'url'),
|
|
|
|
item.fname
|
|
|
|
))
|
|
|
|
|
|
|
|
with open(target, "wt") as f:
|
|
|
|
logging.info("writing sitemap to %s" % (target))
|
|
|
|
f.write("\n".join(urls))
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
def magicphp(self, renderer):
|
|
|
|
redirects = []
|
|
|
|
gones = []
|
|
|
|
rfile = os.path.join(
|
|
|
|
shared.config.get('common', 'basedir'),
|
|
|
|
shared.config.get('common', 'redirects')
|
|
|
|
)
|
|
|
|
if os.path.isfile(rfile):
|
|
|
|
with open(rfile, newline='') as csvfile:
|
|
|
|
r = csv.reader(csvfile, delimiter=' ')
|
|
|
|
for row in r:
|
|
|
|
redirects.append((row[0], row[1]))
|
|
|
|
for item in self.data.values():
|
|
|
|
redirects.append((item.shortslug, item.fname))
|
|
|
|
|
|
|
|
rfile = os.path.join(
|
|
|
|
shared.config.get('common', 'basedir'),
|
|
|
|
shared.config.get('common', 'gone')
|
|
|
|
)
|
|
|
|
if os.path.isfile(rfile):
|
|
|
|
with open(rfile, newline='') as csvfile:
|
|
|
|
r = csv.reader(csvfile, delimiter=' ')
|
|
|
|
for row in r:
|
|
|
|
gones.append(row[0])
|
|
|
|
|
|
|
|
tmplvars = {
|
2017-05-31 13:53:47 +01:00
|
|
|
'site': renderer.sitevars,
|
2017-05-26 10:14:24 +01:00
|
|
|
'redirects': redirects,
|
|
|
|
'gones': gones
|
|
|
|
}
|
|
|
|
|
|
|
|
r = renderer.j2.get_template("magic.php").render(tmplvars)
|
|
|
|
target = os.path.abspath(os.path.join(
|
|
|
|
shared.config.get('target', 'builddir'),
|
|
|
|
'magic.php'
|
|
|
|
))
|
|
|
|
|
|
|
|
with open(target, "w") as html:
|
|
|
|
logging.debug('writing %s', target)
|
|
|
|
html.write(r)
|
|
|
|
html.close()
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-04 11:38:36 +01:00
|
|
|
class Singular(BaseRenderable):
|
2017-06-02 11:19:55 +01:00
|
|
|
def __init__(self, path, images, comments):
|
2017-05-23 11:14:47 +01:00
|
|
|
logging.debug("initiating singular object from %s", path)
|
|
|
|
self.path = path
|
|
|
|
self.images = images
|
2017-06-02 11:19:55 +01:00
|
|
|
self.allcomments = comments
|
2017-05-23 11:14:47 +01:00
|
|
|
self.category = splitpath(path)[-2]
|
|
|
|
self.mtime = int(os.path.getmtime(self.path))
|
|
|
|
self.fname, self.ext = os.path.splitext(os.path.basename(self.path))
|
|
|
|
self.meta = {}
|
|
|
|
self.content = ''
|
|
|
|
self.photo = self.images.data.get("%s.jpg" % self.fname, None)
|
2017-05-31 13:53:47 +01:00
|
|
|
if self.photo:
|
|
|
|
self.photo.singleimage = True
|
2017-05-23 11:14:47 +01:00
|
|
|
self.__parse()
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def __repr__(self):
|
|
|
|
return "%s (lastmod: %s)" % (self.fname, self.published)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def __parse(self):
|
|
|
|
with open(self.path, mode='rt') as f:
|
|
|
|
self.meta, self.content = frontmatter.parse(f.read())
|
2017-06-12 15:17:29 +01:00
|
|
|
self.__filter_favs()
|
|
|
|
self.__filter_images()
|
2017-05-23 11:14:47 +01:00
|
|
|
if self.isphoto:
|
|
|
|
self.content = "%s\n%s" % (
|
|
|
|
self.content,
|
|
|
|
self.photo
|
|
|
|
)
|
2017-06-12 15:17:29 +01:00
|
|
|
trigger = self.offlinecopies
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
def __filter_favs(self):
|
|
|
|
url = self.meta.get('favorite-of',
|
|
|
|
self.meta.get('like-of',
|
|
|
|
self.meta.get('bookmark-of',
|
|
|
|
False
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
2017-07-04 12:30:32 +01:00
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
if not url:
|
|
|
|
return
|
|
|
|
|
2017-07-04 12:30:32 +01:00
|
|
|
img = self.meta.get('image', False)
|
|
|
|
imgs = self.meta.get('images', [])
|
|
|
|
if img:
|
|
|
|
imgs.append(img)
|
|
|
|
|
|
|
|
if not imgs or not len(imgs):
|
|
|
|
return
|
|
|
|
|
|
|
|
c = ''
|
|
|
|
for i in imgs:
|
|
|
|
c = '%s\n[![%s](/%s/%s)](%s){.favurl}' % (
|
|
|
|
c,
|
|
|
|
self.title,
|
|
|
|
shared.config.get('source', 'files'),
|
|
|
|
i,
|
|
|
|
url
|
|
|
|
)
|
2017-06-12 15:17:29 +01:00
|
|
|
|
|
|
|
if self.isbookmark:
|
|
|
|
c = "%s\n\n%s" % (c, self.content)
|
|
|
|
|
|
|
|
self.content = c
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def __filter_images(self):
|
|
|
|
linkto = False
|
|
|
|
isrepost = None
|
|
|
|
|
|
|
|
if len(self.reactions.keys()):
|
|
|
|
isrepost = list(self.reactions.keys())[0]
|
|
|
|
if isrepost and \
|
|
|
|
len(self.reactions[isrepost]) == 1:
|
|
|
|
linkto = self.reactions[isrepost][0]
|
|
|
|
|
|
|
|
m = shared.MDIMGREGEX.findall(self.content)
|
|
|
|
if not m:
|
|
|
|
logging.debug("no images found")
|
|
|
|
return
|
|
|
|
|
|
|
|
for shortcode, alt, fname, title, cl in m:
|
|
|
|
image = self.images.data.get(fname, None)
|
|
|
|
if not image:
|
|
|
|
logging.debug("%s not found in images", fname)
|
|
|
|
continue
|
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
if cl:
|
|
|
|
image.cl = cl
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
logging.debug(
|
|
|
|
"replacing %s in content with %s",
|
|
|
|
shortcode,
|
|
|
|
"%s" % image
|
|
|
|
)
|
|
|
|
self.content = self.content.replace(
|
|
|
|
shortcode,
|
|
|
|
"%s" % image
|
|
|
|
)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
|
|
|
def comments(self):
|
|
|
|
if hasattr(self, '_comments'):
|
|
|
|
return self._comments
|
|
|
|
|
|
|
|
# the comments could point to both the "real" url and the shortslug
|
|
|
|
# so I need to get both
|
|
|
|
c = {}
|
|
|
|
for by in [self.fname, self.shortslug]:
|
|
|
|
c = {**c, **self.allcomments[by].data}
|
|
|
|
#self._comments = [c[k].tmplvars for k in list(sorted(c.keys(), reverse=True))]
|
|
|
|
self._comments = [c[k] for k in list(sorted(c.keys(), reverse=True))]
|
|
|
|
return self._comments
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
|
|
|
def replies(self):
|
|
|
|
if hasattr(self, '_replies'):
|
|
|
|
return self._replies
|
|
|
|
self._replies = [c.tmplvars for c in self.comments if not len(c.reacji)]
|
|
|
|
return self._replies
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
|
|
|
def reacjis(self):
|
|
|
|
if hasattr(self, '_reacjis'):
|
|
|
|
return self._reacjis
|
|
|
|
reacjis = {}
|
|
|
|
for c in self.comments:
|
|
|
|
rj = c.reacji
|
|
|
|
|
|
|
|
if not len(rj):
|
|
|
|
continue
|
|
|
|
|
|
|
|
if not reacjis.get(rj, False):
|
|
|
|
reacjis[rj] = []
|
|
|
|
|
|
|
|
reacjis[rj].append(c.tmplvars)
|
|
|
|
|
|
|
|
self._reacjis = reacjis
|
|
|
|
return self._reacjis
|
|
|
|
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def reactions(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_reactions'):
|
|
|
|
return self._reactions
|
2017-05-23 11:14:47 +01:00
|
|
|
# getting rid of '-' to avoid css trouble and similar
|
|
|
|
convert = {
|
|
|
|
'bookmark-of': 'bookmark',
|
|
|
|
'repost-of': 'repost',
|
|
|
|
'in-reply-to': 'reply',
|
2017-06-12 15:17:29 +01:00
|
|
|
'favorite-of': 'fav',
|
|
|
|
'like-of': 'like',
|
2017-05-23 11:14:47 +01:00
|
|
|
}
|
|
|
|
reactions = {}
|
|
|
|
|
|
|
|
for k, v in convert.items():
|
|
|
|
x = self.meta.get(k, None)
|
|
|
|
if not x:
|
|
|
|
continue
|
|
|
|
if isinstance(x, str):
|
|
|
|
x = [x]
|
|
|
|
reactions[v] = x
|
|
|
|
|
2017-06-03 12:07:03 +01:00
|
|
|
self._reactions = reactions
|
|
|
|
return self._reactions
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
@property
|
|
|
|
def urls(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_urls'):
|
|
|
|
return self._urls
|
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
urls = shared.URLREGEX.findall(self.content)
|
|
|
|
|
|
|
|
for reactionurls in self.reactions.values():
|
|
|
|
urls = [*urls, *reactionurls]
|
|
|
|
|
|
|
|
r = []
|
|
|
|
for link in urls:
|
|
|
|
domain = '{uri.netloc}'.format(uri=urllib.parse.urlparse(link))
|
|
|
|
if domain in shared.config.get('site', 'domains'):
|
|
|
|
continue
|
2017-05-31 13:53:47 +01:00
|
|
|
if link in r:
|
2017-05-26 10:14:24 +01:00
|
|
|
continue
|
|
|
|
r.append(link)
|
|
|
|
|
2017-06-03 12:07:03 +01:00
|
|
|
self._urls = r
|
|
|
|
return self._urls
|
2017-05-26 10:14:24 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def lang(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_lang'):
|
|
|
|
return self._lang
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
lang = 'en'
|
|
|
|
try:
|
|
|
|
lang = langdetect.detect("\n".join([
|
|
|
|
self.title,
|
|
|
|
self.content
|
|
|
|
]))
|
|
|
|
except:
|
|
|
|
pass
|
2017-06-03 12:07:03 +01:00
|
|
|
self._lang = lang
|
|
|
|
return self._lang
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def tags(self):
|
|
|
|
return list(self.meta.get('tags', []))
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def published(self):
|
2017-06-02 11:19:55 +01:00
|
|
|
if hasattr(self, '_published'):
|
|
|
|
return self._published
|
|
|
|
self._published = arrow.get(
|
2017-05-23 11:14:47 +01:00
|
|
|
self.meta.get('published', self.mtime)
|
|
|
|
)
|
2017-06-02 11:19:55 +01:00
|
|
|
return self._published
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def updated(self):
|
2017-06-02 11:19:55 +01:00
|
|
|
if hasattr(self, '_updated'):
|
|
|
|
return self._updated
|
|
|
|
self._updated = arrow.get(
|
2017-05-23 11:14:47 +01:00
|
|
|
self.meta.get('updated',
|
|
|
|
self.meta.get('published', self.mtime)
|
|
|
|
)
|
|
|
|
)
|
2017-06-02 11:19:55 +01:00
|
|
|
return self._updated
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def pubtime(self):
|
|
|
|
return int(self.published.timestamp)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def isphoto(self):
|
|
|
|
if not self.photo:
|
|
|
|
return False
|
|
|
|
return self.photo.is_photo
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def isbookmark(self):
|
|
|
|
return self.meta.get('bookmark-of', False)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
@property
|
|
|
|
def isreply(self):
|
|
|
|
return self.meta.get('in-reply-to', False)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
# TODO
|
|
|
|
#@property
|
|
|
|
#def isrvsp(self):
|
|
|
|
# r'<data class="p-rsvp" value="([^"])">([^<]+)</data>'
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
@property
|
|
|
|
def isfav(self):
|
|
|
|
r = False
|
|
|
|
for maybe in ['like-of', 'favorite-of']:
|
|
|
|
maybe = self.meta.get(maybe, False)
|
|
|
|
if maybe:
|
|
|
|
r = maybe
|
|
|
|
break
|
|
|
|
return r
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def ispage(self):
|
|
|
|
if not self.meta:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def isonfront(self):
|
2017-06-12 15:17:29 +01:00
|
|
|
if self.ispage:
|
|
|
|
return False
|
|
|
|
if self.isbookmark:
|
|
|
|
return False
|
|
|
|
if self.isfav:
|
2017-05-23 11:14:47 +01:00
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def iscategorised(self):
|
|
|
|
if self.ispage:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def summary(self):
|
|
|
|
return self.meta.get('summary', '')
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def title(self):
|
2017-06-03 12:07:03 +01:00
|
|
|
if hasattr(self, '_title'):
|
|
|
|
return self._title
|
|
|
|
|
|
|
|
self._title = ''
|
2017-05-23 11:14:47 +01:00
|
|
|
for maybe in ['title', 'bookmark-of', 'in-reply-to', 'repost-of']:
|
|
|
|
maybe = self.meta.get(maybe, False)
|
|
|
|
if maybe:
|
2017-07-04 12:30:32 +01:00
|
|
|
if isinstance(maybe, list):
|
|
|
|
maybe = maybe.pop()
|
|
|
|
self._title = maybe.replace('\n', ' ').replace('\r', '')
|
2017-06-03 12:07:03 +01:00
|
|
|
break
|
|
|
|
return self._title
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def url(self):
|
|
|
|
return "%s/%s/" % (shared.config.get('site', 'url'), self.fname)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def tmplfile(self):
|
|
|
|
if self.ispage:
|
|
|
|
return 'page.html'
|
2017-05-23 11:13:35 +01:00
|
|
|
else:
|
2017-05-23 11:14:47 +01:00
|
|
|
return 'singular.html'
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def html(self):
|
2017-06-02 11:19:55 +01:00
|
|
|
if hasattr(self, '_html'):
|
|
|
|
return self._html
|
|
|
|
self._html = shared.Pandoc().convert(self.content)
|
|
|
|
return self._html
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
|
|
|
def sumhtml(self):
|
|
|
|
if hasattr(self, '_sumhtml'):
|
|
|
|
return self._sumhtml
|
|
|
|
self._sumhtml = self.meta.get('summary', '')
|
|
|
|
if len(self._sumhtml):
|
|
|
|
self._sumhtml = shared.Pandoc().convert(self.summary)
|
|
|
|
return self._sumhtml
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def offlinecopies(self):
|
|
|
|
# stupidly simple property caching
|
|
|
|
if hasattr(self, 'copies'):
|
|
|
|
return self.copies
|
|
|
|
|
|
|
|
copies = {}
|
2017-07-04 12:30:32 +01:00
|
|
|
for maybe in ['bookmark-of', 'in-reply-to', 'repost-of', 'favorite-of']:
|
2017-05-23 11:14:47 +01:00
|
|
|
maybe = self.meta.get(maybe, False)
|
|
|
|
if not maybe:
|
|
|
|
continue
|
|
|
|
if not isinstance(maybe, list):
|
|
|
|
maybe = [maybe]
|
|
|
|
for url in maybe:
|
2017-07-04 12:30:32 +01:00
|
|
|
arch = OfflineArchive(url)
|
|
|
|
arch.run()
|
|
|
|
#copies[url] = arch.read()
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-07-04 12:30:32 +01:00
|
|
|
#self.copies = copies
|
|
|
|
#return copies
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def exif(self):
|
|
|
|
if not self.isphoto:
|
2017-06-12 15:17:29 +01:00
|
|
|
return {}
|
|
|
|
return self.photo.exif
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def rssenclosure(self):
|
|
|
|
if not self.isphoto:
|
|
|
|
return {}
|
|
|
|
return self.photo.rssenclosure
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def tmplvars(self):
|
2017-06-02 11:19:55 +01:00
|
|
|
if hasattr(self, '_tmplvars'):
|
|
|
|
return self._tmplvars
|
|
|
|
|
|
|
|
self._tmplvars = {
|
2017-05-23 11:14:47 +01:00
|
|
|
'title': self.title,
|
|
|
|
'published': self.published.datetime,
|
|
|
|
'tags': self.tags,
|
|
|
|
'author': dict(shared.config.items('author')),
|
|
|
|
'content': self.content,
|
|
|
|
'html': self.html,
|
|
|
|
'category': self.category,
|
|
|
|
'reactions': self.reactions,
|
|
|
|
'updated': self.updated.datetime,
|
2017-06-12 15:17:29 +01:00
|
|
|
'summary': self.summary,
|
|
|
|
'sumhtml': self.sumhtml,
|
2017-05-23 11:14:47 +01:00
|
|
|
'exif': self.exif,
|
|
|
|
'lang': self.lang,
|
|
|
|
'syndicate': '',
|
|
|
|
'slug': self.fname,
|
|
|
|
'shortslug': self.shortslug,
|
|
|
|
'rssenclosure': self.rssenclosure,
|
2017-07-04 12:30:32 +01:00
|
|
|
#'offlinecopies': self.offlinecopies,
|
|
|
|
#'copies': [],
|
2017-06-02 11:19:55 +01:00
|
|
|
'comments': self.comments,
|
|
|
|
'replies': self.replies,
|
|
|
|
'reacjis': self.reacjis,
|
2017-05-23 11:14:47 +01:00
|
|
|
}
|
2017-06-02 11:19:55 +01:00
|
|
|
return self._tmplvars
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def shortslug(self):
|
2017-06-02 11:19:55 +01:00
|
|
|
if hasattr(self, '_shortslug'):
|
|
|
|
return self._shortslug
|
2017-06-12 15:17:29 +01:00
|
|
|
self._shortslug = shared.baseN(self.pubtime)
|
2017-06-02 11:19:55 +01:00
|
|
|
return self._shortslug
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
@property
|
|
|
|
def target(self):
|
|
|
|
targetdir = os.path.abspath(os.path.join(
|
|
|
|
shared.config.get('target', 'builddir'),
|
|
|
|
self.fname
|
|
|
|
))
|
|
|
|
return os.path.join(targetdir, 'index.html')
|
|
|
|
|
|
|
|
|
2017-06-04 11:38:36 +01:00
|
|
|
async def rendercomments(self, renderer):
|
|
|
|
for comment in self.comments:
|
|
|
|
await comment.render(renderer)
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
async def render(self, renderer):
|
2017-06-03 12:07:03 +01:00
|
|
|
# this is only when I want salmentions and I want to include all of the comments as well
|
|
|
|
# otherwise it affects both webmentions sending and search indexing
|
|
|
|
#if len(self.comments):
|
|
|
|
#lctime = self.comments[0].mtime
|
|
|
|
#if lctime > self.mtime:
|
|
|
|
#self.mtime = lctime
|
2017-06-04 11:38:36 +01:00
|
|
|
await self.rendercomments(renderer)
|
2017-06-03 12:07:03 +01:00
|
|
|
|
|
|
|
mtime = self.mtime
|
2017-06-02 11:19:55 +01:00
|
|
|
if len(self.comments):
|
|
|
|
lctime = self.comments[0].mtime
|
|
|
|
if lctime > self.mtime:
|
2017-06-03 12:07:03 +01:00
|
|
|
mtime = lctime
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
logging.info("rendering and saving %s", self.fname)
|
2017-06-28 12:20:26 +01:00
|
|
|
if not shared.config.getboolean('params', 'force') and os.path.isfile(self.target):
|
|
|
|
ttime = int(os.path.getmtime(self.target))
|
2017-06-03 12:07:03 +01:00
|
|
|
logging.debug('ttime is %d mtime is %d', ttime, mtime)
|
|
|
|
if ttime == mtime:
|
2017-06-28 12:20:26 +01:00
|
|
|
logging.debug(
|
|
|
|
'%s exists and up-to-date (lastmod: %d)',
|
|
|
|
self.target,
|
|
|
|
ttime
|
|
|
|
)
|
2017-05-23 11:14:47 +01:00
|
|
|
return
|
|
|
|
|
|
|
|
tmplvars = {
|
|
|
|
'post': self.tmplvars,
|
|
|
|
'site': renderer.sitevars,
|
|
|
|
'taxonomy': {},
|
|
|
|
}
|
|
|
|
r = renderer.j2.get_template(self.tmplfile).render(tmplvars)
|
2017-06-28 12:20:26 +01:00
|
|
|
self.writerendered(r, mtime)
|
2017-06-02 11:19:55 +01:00
|
|
|
|
|
|
|
|
|
|
|
async def ping(self, pinger):
|
|
|
|
for target in self.urls:
|
|
|
|
record = {
|
|
|
|
'mtime': self.mtime,
|
|
|
|
'source': self.url,
|
|
|
|
'target': target
|
|
|
|
}
|
|
|
|
h = json.dumps(record, sort_keys=True)
|
|
|
|
h = hashlib.sha1(h.encode('utf-8')).hexdigest()
|
|
|
|
if pinger.db.get(h, False):
|
|
|
|
logging.debug(
|
|
|
|
"%s is already pinged from %s @ %d, skipping",
|
|
|
|
target, self.url, self.mtime
|
|
|
|
)
|
|
|
|
continue
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
logging.info("sending webmention from %s to %s", self.url, target)
|
|
|
|
ws = WebmentionSend(self.url, target)
|
2017-06-12 15:17:29 +01:00
|
|
|
try:
|
|
|
|
ws.send(allow_redirects=True, timeout=30)
|
|
|
|
except Exception as e:
|
|
|
|
logging.error('ping failed to %s', target)
|
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
pinger.db[h] = record
|
2017-05-31 13:53:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-31 13:53:47 +01:00
|
|
|
class Webmentioner(object):
|
|
|
|
def __init__(self):
|
|
|
|
self.dbpath = os.path.abspath(os.path.join(
|
|
|
|
shared.config.get('target', 'builddir'),
|
|
|
|
shared.config.get('var', 'webmentions')
|
|
|
|
))
|
|
|
|
|
|
|
|
if os.path.isfile(self.dbpath):
|
|
|
|
with open(self.dbpath, 'rt') as f:
|
|
|
|
self.db = json.loads(f.read())
|
|
|
|
else:
|
|
|
|
self.db = {}
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-31 13:53:47 +01:00
|
|
|
def finish(self):
|
|
|
|
with open(self.dbpath, 'wt') as f:
|
|
|
|
f.write(json.dumps(self.db, sort_keys=True, indent=4))
|
|
|
|
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
class NASG(object):
|
2017-06-02 11:19:55 +01:00
|
|
|
lockfile = os.path.join(tempfile.gettempdir(), 'nasg_%s.lock' % getpass.getuser())
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def __init__(self):
|
|
|
|
# --- set params
|
|
|
|
parser = argparse.ArgumentParser(description='Parameters for NASG')
|
2017-05-26 10:14:24 +01:00
|
|
|
parser.add_argument(
|
|
|
|
'--clear',
|
|
|
|
action='store_true',
|
|
|
|
default=False,
|
|
|
|
help='clear build directory in advance'
|
|
|
|
)
|
2017-05-23 11:14:47 +01:00
|
|
|
parser.add_argument(
|
|
|
|
'--regenerate',
|
|
|
|
action='store_true',
|
|
|
|
default=False,
|
|
|
|
help='force downsizing images'
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
'--force',
|
|
|
|
action='store_true',
|
|
|
|
default=False,
|
|
|
|
help='force rendering HTML'
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
'--loglevel',
|
2017-05-31 13:53:47 +01:00
|
|
|
default='error',
|
2017-05-23 11:14:47 +01:00
|
|
|
help='change loglevel'
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
'--nodownsize',
|
|
|
|
action='store_true',
|
|
|
|
default=False,
|
|
|
|
help='skip image downsizing'
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
'--norender',
|
|
|
|
action='store_true',
|
|
|
|
default=False,
|
|
|
|
help='skip rendering'
|
|
|
|
)
|
2017-06-28 12:20:26 +01:00
|
|
|
parser.add_argument(
|
|
|
|
'--refetch',
|
|
|
|
action='store_true',
|
|
|
|
default=False,
|
|
|
|
help='force re-fetching offline archives'
|
|
|
|
)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
params = vars(parser.parse_args())
|
|
|
|
shared.config.add_section('params')
|
|
|
|
for k, v in params.items():
|
|
|
|
shared.config.set('params', k, str(v))
|
|
|
|
|
|
|
|
|
|
|
|
# remove the rest of the potential loggers
|
|
|
|
while len(logging.root.handlers) > 0:
|
|
|
|
logging.root.removeHandler(logging.root.handlers[-1])
|
|
|
|
|
|
|
|
# --- set loglevel
|
|
|
|
logging.basicConfig(
|
2017-07-05 22:09:06 +01:00
|
|
|
level=shared.LLEVEL[shared.config.get('params', 'loglevel')],
|
2017-05-23 11:14:47 +01:00
|
|
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
|
|
)
|
|
|
|
|
|
|
|
async def __adownsize(self, images, existing):
|
|
|
|
for fname, img in images:
|
|
|
|
await img.downsize(existing)
|
|
|
|
|
|
|
|
async def __acrender(self, content, renderer):
|
|
|
|
for (pubtime, singular) in content:
|
|
|
|
await singular.render(renderer)
|
|
|
|
|
|
|
|
async def __atrender(self, taxonomies, renderer):
|
|
|
|
for e in taxonomies:
|
|
|
|
for name, t in e.items():
|
|
|
|
await t.render(renderer)
|
|
|
|
|
|
|
|
async def __afrender(self, front, renderer):
|
|
|
|
await front.render(renderer)
|
|
|
|
|
|
|
|
async def __aindex(self, content, searchdb):
|
|
|
|
for (pubtime, singular) in content:
|
|
|
|
await searchdb.append(singular)
|
|
|
|
|
2017-05-31 13:53:47 +01:00
|
|
|
async def __aping(self, content, pinger):
|
|
|
|
for (pubtime, singular) in content:
|
2017-06-02 11:19:55 +01:00
|
|
|
await singular.ping(pinger)
|
2017-05-31 13:53:47 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def run(self):
|
2017-06-02 11:19:55 +01:00
|
|
|
if os.path.isfile(self.lockfile):
|
|
|
|
raise ValueError(
|
|
|
|
"Lockfile is present at %s; another instance is running." % (
|
|
|
|
self.lockfile
|
|
|
|
)
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
atexit.register(os.remove, self.lockfile)
|
|
|
|
with open(self.lockfile, "wt") as f:
|
|
|
|
f.write(arrow.utcnow().format())
|
2017-05-26 10:14:24 +01:00
|
|
|
|
|
|
|
if shared.config.getboolean('params', 'clear'):
|
|
|
|
input('about to clear build directory, press enter to continue')
|
|
|
|
shutil.rmtree(os.path.abspath(
|
|
|
|
shared.config.get('target', 'builddir')
|
|
|
|
))
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
|
|
|
|
for d in shared.config.options('target'):
|
|
|
|
if 'dir' in d and not os.path.isdir(shared.config.get('target', d)):
|
|
|
|
os.mkdir(shared.config.get('target', d))
|
|
|
|
|
|
|
|
logging.info("discovering images")
|
|
|
|
images = Images()
|
|
|
|
images.populate()
|
2017-05-31 13:53:47 +01:00
|
|
|
existing = glob.glob(os.path.join(
|
|
|
|
shared.config.get('target', 'filesdir'),
|
|
|
|
"*"
|
|
|
|
))
|
2017-05-23 11:14:47 +01:00
|
|
|
if not shared.config.getboolean('params', 'nodownsize'):
|
|
|
|
logging.info("downsizing images")
|
|
|
|
loop.run_until_complete(self.__adownsize(images, existing))
|
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
logging.info("discovering comments")
|
|
|
|
comments = Comments()
|
|
|
|
comments.populate()
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
logging.info("discovering content")
|
2017-06-02 11:19:55 +01:00
|
|
|
content = Content(images, comments)
|
2017-05-23 11:14:47 +01:00
|
|
|
content.populate()
|
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
renderer = Renderer()
|
2017-05-23 11:14:47 +01:00
|
|
|
if not shared.config.getboolean('params', 'norender'):
|
|
|
|
logging.info("rendering content")
|
2017-06-02 11:19:55 +01:00
|
|
|
loop.run_until_complete(self.__acrender(
|
|
|
|
content, renderer
|
|
|
|
))
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
logging.info("rendering categories and tags")
|
2017-06-02 11:19:55 +01:00
|
|
|
loop.run_until_complete(self.__atrender(
|
|
|
|
[content.categories, content.tags], renderer
|
|
|
|
))
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
logging.info("rendering the front page elements")
|
2017-06-02 11:19:55 +01:00
|
|
|
loop.run_until_complete(self.__afrender(
|
|
|
|
content.front, renderer
|
|
|
|
))
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
logging.info("rendering sitemap")
|
|
|
|
content.sitemap()
|
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
logging.info("render magic.php")
|
|
|
|
content.magicphp(renderer)
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
logging.info("copy the static bits")
|
|
|
|
src = shared.config.get('source', 'staticdir')
|
|
|
|
for item in os.listdir(src):
|
|
|
|
s = os.path.join(src, item)
|
|
|
|
d = os.path.join(shared.config.get('target', 'builddir'), item)
|
|
|
|
logging.debug("copying %s to %s", s, d)
|
|
|
|
shutil.copy2(s, d)
|
|
|
|
|
|
|
|
logging.info("pouplating searchdb")
|
2017-06-01 12:19:32 +01:00
|
|
|
searchdb = Indexer()
|
2017-05-23 11:14:47 +01:00
|
|
|
loop.run_until_complete(self.__aindex(content, searchdb))
|
|
|
|
searchdb.finish()
|
|
|
|
|
2017-05-31 13:53:47 +01:00
|
|
|
logging.info("webmentioning urls")
|
|
|
|
pinger = Webmentioner()
|
|
|
|
loop.run_until_complete(self.__aping(content, pinger))
|
|
|
|
pinger.finish()
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
loop.close()
|
2017-05-23 11:13:35 +01:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2017-05-23 11:14:47 +01:00
|
|
|
worker = NASG()
|
|
|
|
worker.run()
|