2017-05-23 11:14:47 +01:00
|
|
|
#!/usr/bin/env python3
|
2017-12-17 17:37:32 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# vim: set fileencoding=utf-8 :
|
|
|
|
|
|
|
|
__author__ = "Peter Molnar"
|
2018-03-29 17:07:53 +01:00
|
|
|
__copyright__ = "Copyright 2017-2018, Peter Molnar"
|
2017-12-17 17:37:32 +00:00
|
|
|
__license__ = "GPLv3"
|
2018-04-30 20:44:04 +01:00
|
|
|
__version__ = "2.2.0"
|
2017-12-17 17:37:32 +00:00
|
|
|
__maintainer__ = "Peter Molnar"
|
2018-04-30 20:44:04 +01:00
|
|
|
__email__ = "mail@petermolnar.net"
|
2017-12-17 17:37:32 +00:00
|
|
|
__status__ = "Production"
|
|
|
|
|
|
|
|
"""
|
|
|
|
silo archiver module of NASG
|
2018-03-29 17:07:53 +01:00
|
|
|
Copyright (C) 2017-2018 Peter Molnar
|
2017-12-17 17:37:32 +00:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software Foundation,
|
|
|
|
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
"""
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
import os
|
|
|
|
import re
|
2017-05-23 11:13:35 +01:00
|
|
|
import logging
|
2017-05-23 11:14:47 +01:00
|
|
|
import json
|
|
|
|
import glob
|
2017-10-27 10:29:33 +01:00
|
|
|
import argparse
|
|
|
|
import shutil
|
|
|
|
from urllib.parse import urlparse
|
2017-05-23 11:14:47 +01:00
|
|
|
import asyncio
|
2017-10-27 10:29:33 +01:00
|
|
|
from math import ceil
|
2017-05-26 10:14:24 +01:00
|
|
|
import csv
|
2018-03-21 15:42:36 +00:00
|
|
|
import html
|
2017-05-23 11:14:47 +01:00
|
|
|
import frontmatter
|
2017-11-30 17:01:14 +00:00
|
|
|
import requests
|
2017-10-27 10:29:33 +01:00
|
|
|
import arrow
|
2017-05-23 11:14:47 +01:00
|
|
|
import langdetect
|
2017-10-27 10:29:33 +01:00
|
|
|
import wand.image
|
2017-10-27 15:56:05 +01:00
|
|
|
from emoji import UNICODE_EMOJI
|
2017-11-30 17:01:14 +00:00
|
|
|
from feedgen.feed import FeedGenerator
|
2017-06-12 15:40:30 +01:00
|
|
|
import shared
|
2017-05-31 13:53:47 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
class MagicPHP(object):
|
2017-11-10 15:56:45 +00:00
|
|
|
''' router PHP generator '''
|
2017-10-29 19:11:01 +00:00
|
|
|
name = 'index.php'
|
2017-10-27 10:29:33 +01:00
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
# init 'gone 410' array
|
|
|
|
self.gones = []
|
2017-11-10 15:56:45 +00:00
|
|
|
f_gone = shared.config.get('var', 'gone')
|
|
|
|
if os.path.isfile(f_gone):
|
|
|
|
with open(f_gone) as csvfile:
|
2017-10-27 10:29:33 +01:00
|
|
|
reader = csv.reader(csvfile, delimiter=' ')
|
|
|
|
for row in reader:
|
|
|
|
self.gones.append(row[0])
|
|
|
|
# init manual redirects array
|
|
|
|
self.redirects = []
|
2017-11-10 15:56:45 +00:00
|
|
|
f_redirect = shared.config.get('var', 'redirects')
|
|
|
|
if os.path.isfile(f_redirect):
|
|
|
|
with open(f_redirect) as csvfile:
|
2017-10-27 10:29:33 +01:00
|
|
|
reader = csv.reader(csvfile, delimiter=' ')
|
|
|
|
for row in reader:
|
|
|
|
self.redirects.append((row[0], row[1]))
|
|
|
|
|
|
|
|
@property
|
|
|
|
def phpfile(self):
|
|
|
|
return os.path.join(
|
|
|
|
shared.config.get('common', 'build'),
|
|
|
|
self.name
|
|
|
|
)
|
2017-05-26 14:52:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
async def render(self):
|
2017-11-10 15:56:45 +00:00
|
|
|
logging.info('saving %s', self.name)
|
2017-10-27 10:29:33 +01:00
|
|
|
o = self.phpfile
|
2017-11-10 15:56:45 +00:00
|
|
|
tmplfile = "%s.html" % (self.__class__.__name__)
|
2017-10-27 10:29:33 +01:00
|
|
|
r = shared.j2.get_template(tmplfile).render({
|
|
|
|
'site': shared.site,
|
|
|
|
'redirects': self.redirects,
|
|
|
|
'gones': self.gones
|
|
|
|
})
|
|
|
|
with open(o, 'wt') as out:
|
2017-11-10 15:56:45 +00:00
|
|
|
logging.debug('writing file %s', o)
|
2017-10-27 10:29:33 +01:00
|
|
|
out.write(r)
|
|
|
|
|
|
|
|
|
|
|
|
class NoDupeContainer(object):
|
2017-11-10 15:56:45 +00:00
|
|
|
''' Base class to hold keys => data dicts with errors on dupes '''
|
2017-11-10 16:04:05 +00:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
def __init__(self):
|
|
|
|
self.data = {}
|
2017-10-27 10:29:33 +01:00
|
|
|
self.default = None
|
2017-06-12 15:40:30 +01:00
|
|
|
|
|
|
|
def append(self, key, value):
|
2017-10-27 10:29:33 +01:00
|
|
|
# all clear
|
|
|
|
if key not in self.data:
|
|
|
|
self.data.update({key: value})
|
2017-06-12 15:40:30 +01:00
|
|
|
return
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# problem
|
|
|
|
logging.error(
|
|
|
|
"duplicate key error when populating %s: %s",
|
2017-11-10 15:56:45 +00:00
|
|
|
self.__class__.__name__,
|
|
|
|
key
|
|
|
|
)
|
2017-10-27 10:29:33 +01:00
|
|
|
logging.error(
|
|
|
|
"current: %s",
|
2017-11-10 15:56:45 +00:00
|
|
|
self.data.get(key)
|
|
|
|
)
|
2017-10-27 10:29:33 +01:00
|
|
|
logging.error(
|
|
|
|
"problem: %s",
|
2017-11-10 15:56:45 +00:00
|
|
|
value
|
|
|
|
)
|
2017-10-27 10:29:33 +01:00
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
# TODO: return ordered version of data
|
2017-06-12 15:40:30 +01:00
|
|
|
|
|
|
|
def __getitem__(self, key):
|
2017-10-27 10:29:33 +01:00
|
|
|
return self.data.get(key, self.default)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def __setitem__(self, key, value):
|
|
|
|
return self.append(key, value)
|
|
|
|
|
|
|
|
def __contains__(self, key):
|
|
|
|
if key in self.data.keys():
|
|
|
|
return True
|
|
|
|
return False
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def __len__(self):
|
|
|
|
return len(self.data.keys())
|
2017-06-12 15:40:30 +01:00
|
|
|
|
|
|
|
def __next__(self):
|
|
|
|
try:
|
|
|
|
r = self.data.next()
|
2017-11-10 16:04:05 +00:00
|
|
|
except BaseException:
|
2017-06-12 15:40:30 +01:00
|
|
|
raise StopIteration()
|
|
|
|
return r
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
for k, v in self.data.items():
|
|
|
|
yield (k, v)
|
|
|
|
return
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
|
|
|
|
class FContainer(NoDupeContainer):
|
2017-11-10 15:56:45 +00:00
|
|
|
""" This is a container that holds a lists of files based on Container so
|
|
|
|
it errors on duplicate slugs and is popolated with recorsive glob """
|
2017-11-10 16:04:05 +00:00
|
|
|
|
2017-11-10 15:56:45 +00:00
|
|
|
def __init__(self, dirs, extensions=['*']):
|
2017-10-27 10:29:33 +01:00
|
|
|
super().__init__()
|
|
|
|
files = []
|
|
|
|
for ext in extensions:
|
|
|
|
for p in dirs:
|
|
|
|
files.extend(glob.iglob(
|
2017-11-10 15:56:45 +00:00
|
|
|
os.path.join(p, '*.%s' % (ext)),
|
2017-10-27 10:29:33 +01:00
|
|
|
recursive=True
|
|
|
|
))
|
|
|
|
# eliminate duplicates
|
|
|
|
files = list(set(files))
|
|
|
|
for fpath in files:
|
|
|
|
fname = os.path.basename(fpath)
|
|
|
|
self.append(fname, fpath)
|
|
|
|
|
2017-11-10 15:56:45 +00:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
class Content(FContainer):
|
2017-11-10 15:56:45 +00:00
|
|
|
""" This is a container that holds markdown files that are parsed when the
|
|
|
|
container is populated on the fly; based on FContainer which is a Container
|
|
|
|
"""
|
2017-11-10 16:04:05 +00:00
|
|
|
|
2017-06-04 11:38:36 +01:00
|
|
|
def __init__(self):
|
2017-11-10 15:56:45 +00:00
|
|
|
dirs = [os.path.join(shared.config.get('dirs', 'content'), "**")]
|
|
|
|
extensions = ['md', 'jpg']
|
2017-10-27 10:29:33 +01:00
|
|
|
super().__init__(dirs, extensions)
|
|
|
|
for fname, fpath in self.data.items():
|
|
|
|
self.data.update({fname: Singular(fpath)})
|
|
|
|
|
2017-11-10 15:56:45 +00:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
class Category(NoDupeContainer):
|
|
|
|
""" A Category which holds pubtime (int) => Singular data """
|
|
|
|
indexfile = 'index.html'
|
2017-10-28 19:08:40 +01:00
|
|
|
feedfile = 'index.xml'
|
2017-10-27 10:29:33 +01:00
|
|
|
feeddir = 'feed'
|
|
|
|
pagedir = 'page'
|
|
|
|
taxonomy = 'category'
|
|
|
|
|
2018-04-30 20:44:04 +01:00
|
|
|
def __init__(self, name='', is_front=False):
|
2017-10-27 10:29:33 +01:00
|
|
|
self.name = name
|
2018-03-28 15:19:14 +01:00
|
|
|
self.topics = NoDupeContainer()
|
2018-04-30 20:44:04 +01:00
|
|
|
self.is_front = is_front
|
2017-10-27 10:29:33 +01:00
|
|
|
super().__init__()
|
2017-06-04 11:38:36 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def append(self, post):
|
2018-03-28 15:19:14 +01:00
|
|
|
if len(post.tags) == 1:
|
|
|
|
topic = post.tags[0]
|
|
|
|
if topic not in self.topics:
|
|
|
|
t = NoDupeContainer()
|
|
|
|
self.topics.append(topic, t)
|
|
|
|
t = self.topics[topic]
|
|
|
|
t.append(post.pubtime, post)
|
2017-10-27 10:29:33 +01:00
|
|
|
return super().append(post.pubtime, post)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def mtime(self):
|
|
|
|
return int(sorted(self.data.keys(), reverse=True)[0])
|
2017-06-04 11:38:36 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def is_uptodate(self):
|
|
|
|
index = os.path.join(self.path_paged(), self.indexfile)
|
|
|
|
if not os.path.isfile(index):
|
|
|
|
return False
|
|
|
|
mtime = os.path.getmtime(index)
|
|
|
|
if mtime == self.mtime:
|
|
|
|
return True
|
|
|
|
return False
|
2017-06-04 11:38:36 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def title(self):
|
2017-11-30 17:01:14 +00:00
|
|
|
return ' - '.join([
|
|
|
|
self.name,
|
|
|
|
shared.config.get('common', 'domain')
|
|
|
|
])
|
2017-10-27 10:29:33 +01:00
|
|
|
|
2018-03-28 15:19:14 +01:00
|
|
|
@property
|
|
|
|
def is_altrender(self):
|
|
|
|
return os.path.exists(
|
|
|
|
os.path.join(
|
|
|
|
shared.config.get('dirs', 'tmpl'),
|
|
|
|
"%s_%s.html" % (
|
|
|
|
self.__class__.__name__,
|
|
|
|
self.name
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2017-10-28 19:08:40 +01:00
|
|
|
@property
|
|
|
|
def url(self):
|
2017-10-27 10:29:33 +01:00
|
|
|
if self.name:
|
2017-10-28 19:08:40 +01:00
|
|
|
url = "/%s/%s/" % (
|
2017-10-27 10:29:33 +01:00
|
|
|
self.taxonomy,
|
|
|
|
self.name,
|
|
|
|
)
|
|
|
|
else:
|
2017-10-28 19:08:40 +01:00
|
|
|
url = '/'
|
|
|
|
return url
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def path_paged(self, page=1, feed=False):
|
|
|
|
x = shared.config.get('common', 'build')
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
if self.name:
|
|
|
|
x = os.path.join(
|
|
|
|
x,
|
|
|
|
self.taxonomy,
|
|
|
|
self.name,
|
|
|
|
)
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
if page == 1:
|
|
|
|
if feed:
|
|
|
|
x = os.path.join(x, self.feeddir)
|
2017-05-26 14:52:30 +01:00
|
|
|
else:
|
2017-10-27 10:29:33 +01:00
|
|
|
x = os.path.join(x, self.pagedir, "%s" % page)
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
if not os.path.isdir(x):
|
|
|
|
os.makedirs(x)
|
|
|
|
return x
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2018-03-28 15:19:14 +01:00
|
|
|
def write_html(self, path, content):
|
|
|
|
with open(path, 'wt') as out:
|
|
|
|
logging.debug('writing file %s', path)
|
|
|
|
out.write(content)
|
|
|
|
os.utime(path, (self.mtime, self.mtime))
|
|
|
|
|
|
|
|
async def render(self):
|
|
|
|
if self.is_altrender:
|
|
|
|
self.render_onepage()
|
|
|
|
else:
|
|
|
|
self.render_paginated()
|
|
|
|
self.render_feed()
|
|
|
|
|
|
|
|
def render_onepage(self):
|
|
|
|
years = {}
|
|
|
|
for k in list(sorted(self.data.keys(), reverse=True)):
|
|
|
|
post = self.data[k]
|
|
|
|
year = int(arrow.get(post.pubtime).format('YYYY'))
|
|
|
|
if year not in years:
|
|
|
|
years.update({year: []})
|
|
|
|
years[year].append(post.tmplvars)
|
|
|
|
|
|
|
|
tmplvars = {
|
|
|
|
'taxonomy': {
|
2018-04-30 20:44:04 +01:00
|
|
|
'add_welcome': self.is_front,
|
2018-03-28 15:19:14 +01:00
|
|
|
'title': self.title,
|
|
|
|
'name': self.name,
|
|
|
|
'lastmod': arrow.get(self.mtime).format(
|
|
|
|
shared.ARROWFORMAT['rcf']
|
|
|
|
),
|
|
|
|
'url': self.url,
|
|
|
|
'feed': "%s/%s/" % (
|
|
|
|
self.url,
|
|
|
|
shared.config.get('site', 'feed')
|
|
|
|
),
|
|
|
|
},
|
|
|
|
'site': shared.site,
|
|
|
|
'by_year': years
|
|
|
|
}
|
|
|
|
dirname = self.path_paged(1)
|
|
|
|
o = os.path.join(dirname, self.indexfile)
|
|
|
|
logging.info(
|
|
|
|
"Rendering category %s to %s",
|
|
|
|
self.name,
|
|
|
|
o
|
|
|
|
)
|
|
|
|
tmplfile = "%s_%s.html" % (
|
|
|
|
self.__class__.__name__,
|
|
|
|
self.name
|
|
|
|
)
|
|
|
|
r = shared.j2.get_template(tmplfile).render(tmplvars)
|
|
|
|
self.write_html(o, r)
|
|
|
|
|
|
|
|
def render_feed(self):
|
|
|
|
start = 0
|
|
|
|
end = int(shared.config.getint('display', 'pagination'))
|
|
|
|
posttmpls = [
|
|
|
|
self.data[k].tmplvars
|
|
|
|
for k in list(sorted(
|
|
|
|
self.data.keys(),
|
|
|
|
reverse=True
|
|
|
|
))[start:end]
|
|
|
|
]
|
2017-11-30 17:01:14 +00:00
|
|
|
dirname = self.path_paged(1, feed=True)
|
|
|
|
o = os.path.join(dirname, self.feedfile)
|
|
|
|
logging.info(
|
|
|
|
"Rendering feed of category %s to %s",
|
|
|
|
self.name,
|
|
|
|
o
|
|
|
|
)
|
|
|
|
|
|
|
|
flink = "%s%s%s" % (
|
|
|
|
shared.config.get('site', 'url'),
|
|
|
|
self.url,
|
|
|
|
shared.config.get('site', 'feed')
|
|
|
|
)
|
|
|
|
fg = FeedGenerator()
|
|
|
|
fg.id(flink)
|
|
|
|
fg.link(
|
|
|
|
href=flink,
|
|
|
|
rel='self'
|
|
|
|
)
|
|
|
|
fg.title(self.title)
|
|
|
|
fg.author({
|
|
|
|
'name': shared.site.get('author').get('name'),
|
|
|
|
'email': shared.site.get('author').get('email')
|
|
|
|
})
|
|
|
|
fg.logo('%s/favicon.png' % shared.site.get('url'))
|
|
|
|
fg.updated(arrow.get(self.mtime).to('utc').datetime)
|
|
|
|
|
|
|
|
for p in reversed(posttmpls):
|
2018-04-30 20:44:04 +01:00
|
|
|
link = '%s/%s/' % (shared.site.get('url'), p.get('slug'))
|
2018-06-08 10:17:57 +01:00
|
|
|
dt = arrow.get(p.get('pubtime')).to('utc')
|
2017-11-30 17:01:14 +00:00
|
|
|
|
2018-04-30 20:44:04 +01:00
|
|
|
content = p.get('html')
|
|
|
|
if p.get('photo'):
|
|
|
|
content = "%s\n\n%s" % (p.get('photo'), content)
|
|
|
|
|
2017-11-30 17:01:14 +00:00
|
|
|
fe = fg.add_entry()
|
|
|
|
fe.id(link)
|
2018-04-30 20:44:04 +01:00
|
|
|
fe.link(href=link)
|
2017-11-30 17:01:14 +00:00
|
|
|
fe.title(p.get('title'))
|
|
|
|
fe.published(dt.datetime)
|
|
|
|
fe.updated(dt.datetime)
|
2018-04-30 20:44:04 +01:00
|
|
|
fe.content(
|
|
|
|
content,
|
|
|
|
type='CDATA'
|
|
|
|
)
|
2017-11-30 17:01:14 +00:00
|
|
|
fe.rights('%s %s %s' % (
|
|
|
|
dt.format('YYYY'),
|
|
|
|
shared.site.get('author').get('name'),
|
|
|
|
p.get('licence').get('text')
|
|
|
|
))
|
2018-06-01 10:49:14 +01:00
|
|
|
if p.get('enclosure'):
|
|
|
|
enclosure = p.get('enclosure')
|
|
|
|
fe.enclosure(
|
|
|
|
enclosure.get('url'),
|
|
|
|
"%d" % enclosure.get('size'),
|
|
|
|
enclosure.get('mime')
|
|
|
|
)
|
2017-11-30 17:01:14 +00:00
|
|
|
|
|
|
|
with open(o, 'wb') as f:
|
|
|
|
f.write(fg.atom_str(pretty=True))
|
|
|
|
|
2018-06-08 10:17:57 +01:00
|
|
|
# with open(o.replace('.xml', '.rss'), 'wb') as f:
|
|
|
|
# f.write(fg.rss_str(pretty=True))
|
2018-04-30 20:44:04 +01:00
|
|
|
|
2017-11-30 17:01:14 +00:00
|
|
|
# ping pubsub
|
|
|
|
r = requests.post(
|
|
|
|
shared.site.get('websub').get('hub'),
|
|
|
|
data={
|
|
|
|
'hub.mode': 'publish',
|
|
|
|
'hub.url': flink
|
|
|
|
}
|
|
|
|
)
|
|
|
|
logging.info(r.text)
|
|
|
|
|
2018-03-28 15:19:14 +01:00
|
|
|
def render_paginated(self):
|
2017-10-27 10:29:33 +01:00
|
|
|
pagination = shared.config.getint('display', 'pagination')
|
|
|
|
pages = ceil(len(self.data) / pagination)
|
|
|
|
page = 1
|
2018-04-30 20:44:04 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
while page <= pages:
|
2018-04-30 20:44:04 +01:00
|
|
|
add_welcome = False
|
|
|
|
if (self.is_front and page == 1):
|
|
|
|
add_welcome = True
|
2017-10-27 10:29:33 +01:00
|
|
|
# list relevant post templates
|
2017-11-10 16:04:05 +00:00
|
|
|
start = int((page - 1) * pagination)
|
2017-10-27 10:29:33 +01:00
|
|
|
end = int(start + pagination)
|
|
|
|
posttmpls = [
|
|
|
|
self.data[k].tmplvars
|
|
|
|
for k in list(sorted(
|
|
|
|
self.data.keys(),
|
|
|
|
reverse=True
|
|
|
|
))[start:end]
|
|
|
|
]
|
|
|
|
# define data for template
|
2017-11-30 17:01:14 +00:00
|
|
|
# TODO move the pagination links here, the one in jinja
|
|
|
|
# is overcomplicated
|
2017-10-27 10:29:33 +01:00
|
|
|
tmplvars = {
|
|
|
|
'taxonomy': {
|
2018-04-30 20:44:04 +01:00
|
|
|
'add_welcome': add_welcome,
|
2017-10-27 10:29:33 +01:00
|
|
|
'title': self.title,
|
|
|
|
'name': self.name,
|
|
|
|
'page': page,
|
|
|
|
'total': pages,
|
|
|
|
'perpage': pagination,
|
2017-11-10 15:56:45 +00:00
|
|
|
'lastmod': arrow.get(self.mtime).format(
|
|
|
|
shared.ARROWFORMAT['rcf']
|
|
|
|
),
|
2017-10-28 19:08:40 +01:00
|
|
|
'url': self.url,
|
|
|
|
'feed': "%s/%s/" % (
|
|
|
|
self.url,
|
|
|
|
shared.config.get('site', 'feed')
|
|
|
|
),
|
2017-10-27 10:29:33 +01:00
|
|
|
},
|
|
|
|
'site': shared.site,
|
|
|
|
'posts': posttmpls,
|
|
|
|
}
|
|
|
|
# render HTML
|
|
|
|
dirname = self.path_paged(page)
|
|
|
|
o = os.path.join(dirname, self.indexfile)
|
2017-11-10 15:56:45 +00:00
|
|
|
logging.info(
|
|
|
|
"Rendering page %d/%d of category %s to %s",
|
|
|
|
page,
|
|
|
|
pages,
|
|
|
|
self.name,
|
|
|
|
o
|
|
|
|
)
|
|
|
|
tmplfile = "%s.html" % (self.__class__.__name__)
|
2017-10-27 10:29:33 +01:00
|
|
|
r = shared.j2.get_template(tmplfile).render(tmplvars)
|
|
|
|
self.write_html(o, r)
|
2017-11-10 16:04:05 +00:00
|
|
|
page = page + 1
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
class Singular(object):
|
|
|
|
indexfile = 'index.html'
|
2017-07-17 14:21:28 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def __init__(self, fpath):
|
|
|
|
logging.debug("initiating singular object from %s", fpath)
|
|
|
|
self.fpath = fpath
|
|
|
|
self.mtime = os.path.getmtime(self.fpath)
|
2017-10-30 09:24:46 +00:00
|
|
|
self.stime = self.mtime
|
2017-10-27 10:29:33 +01:00
|
|
|
self.fname, self.fext = os.path.splitext(os.path.basename(self.fpath))
|
|
|
|
self.category = os.path.basename(os.path.dirname(self.fpath))
|
|
|
|
self._images = NoDupeContainer()
|
2017-07-17 14:21:28 +01:00
|
|
|
|
2017-11-10 15:56:45 +00:00
|
|
|
if self.fext == '.md':
|
2017-10-27 10:29:33 +01:00
|
|
|
with open(self.fpath, mode='rt') as f:
|
|
|
|
self.fm = frontmatter.parse(f.read())
|
|
|
|
self.meta, self.content = self.fm
|
|
|
|
self.photo = None
|
2017-11-10 15:56:45 +00:00
|
|
|
elif self.fext == '.jpg':
|
2017-10-27 10:29:33 +01:00
|
|
|
self.photo = WebImage(self.fpath)
|
|
|
|
self.meta = self.photo.fm_meta
|
|
|
|
self.content = self.photo.fm_content
|
|
|
|
self.photo.inline = False
|
|
|
|
self.photo.cssclass = 'u-photo'
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-29 19:11:01 +00:00
|
|
|
def init_extras(self):
|
2017-10-30 10:47:08 +00:00
|
|
|
self.receive_webmentions()
|
2017-10-29 19:11:01 +00:00
|
|
|
c = self.comments
|
|
|
|
|
2017-10-30 10:47:08 +00:00
|
|
|
# note: due to SQLite locking, this will not be async for now
|
|
|
|
def receive_webmentions(self):
|
2017-10-30 09:24:46 +00:00
|
|
|
wdb = shared.WebmentionQueue()
|
2017-10-29 19:11:01 +00:00
|
|
|
queued = wdb.get_queued(self.url)
|
|
|
|
for incoming in queued:
|
|
|
|
wm = Webmention(
|
|
|
|
incoming.get('source'),
|
|
|
|
incoming.get('target'),
|
|
|
|
incoming.get('dt')
|
|
|
|
)
|
2017-10-30 10:47:08 +00:00
|
|
|
wm.receive()
|
2017-10-29 19:11:01 +00:00
|
|
|
wdb.entry_done(incoming.get('id'))
|
|
|
|
wdb.finish()
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2018-03-29 17:07:53 +01:00
|
|
|
def queue_webmentions(self):
|
2018-04-30 20:44:04 +01:00
|
|
|
if self.is_future:
|
|
|
|
return
|
2017-10-30 10:47:08 +00:00
|
|
|
wdb = shared.WebmentionQueue()
|
2018-03-29 17:07:53 +01:00
|
|
|
for target in self.urls_to_ping:
|
|
|
|
if not wdb.exists(self.url, target, self.published):
|
|
|
|
wdb.queue(self.url, target)
|
|
|
|
else:
|
2018-06-08 10:17:57 +01:00
|
|
|
logging.debug(
|
|
|
|
"not queueing - webmention already queued from %s to %s",
|
|
|
|
self.url,
|
|
|
|
target)
|
2017-10-30 10:47:08 +00:00
|
|
|
wdb.finish()
|
|
|
|
|
2018-03-29 17:07:53 +01:00
|
|
|
@property
|
|
|
|
def urls_to_ping(self):
|
2018-06-08 10:17:57 +01:00
|
|
|
urls = [x.strip()
|
|
|
|
for x in shared.REGEX.get('urls').findall(self.content)]
|
2018-03-29 17:07:53 +01:00
|
|
|
if self.is_reply:
|
|
|
|
urls.append(self.is_reply)
|
|
|
|
for url in self.syndicate:
|
|
|
|
urls.append(url)
|
|
|
|
r = {}
|
|
|
|
for link in urls:
|
|
|
|
parsed = urlparse(link)
|
|
|
|
if parsed.netloc in shared.config.get('site', 'domains'):
|
|
|
|
continue
|
|
|
|
if link in r:
|
|
|
|
continue
|
|
|
|
r.update({link: True})
|
|
|
|
return r.keys()
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def redirects(self):
|
|
|
|
r = self.meta.get('redirect', [])
|
|
|
|
r.append(self.shortslug)
|
|
|
|
return list(set(r))
|
2017-06-12 15:17:29 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def is_uptodate(self):
|
2018-03-21 15:42:36 +00:00
|
|
|
for f in [self.htmlfile]:
|
|
|
|
if not os.path.isfile(f):
|
|
|
|
return False
|
|
|
|
mtime = os.path.getmtime(f)
|
|
|
|
if mtime < self.stime:
|
|
|
|
return False
|
|
|
|
return True
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def htmlfile(self):
|
|
|
|
return os.path.join(
|
|
|
|
shared.config.get('common', 'build'),
|
|
|
|
self.fname,
|
|
|
|
self.indexfile
|
2017-05-23 11:14:47 +01:00
|
|
|
)
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def images(self):
|
|
|
|
if self.photo:
|
|
|
|
self._images.append(self.fname, self.photo)
|
|
|
|
# add inline images
|
|
|
|
for shortcode, alt, fname, title, css in self.inline_images:
|
|
|
|
# this does the appending automatically
|
|
|
|
im = self._find_image(fname)
|
|
|
|
|
|
|
|
return self._images
|
|
|
|
|
2017-10-27 15:56:05 +01:00
|
|
|
@property
|
|
|
|
def comments(self):
|
|
|
|
comments = NoDupeContainer()
|
|
|
|
cfiles = []
|
|
|
|
lookin = [*self.redirects, self.fname]
|
|
|
|
for d in lookin:
|
|
|
|
maybe = glob.glob(
|
|
|
|
os.path.join(
|
|
|
|
shared.config.get('dirs', 'comment'),
|
|
|
|
d,
|
|
|
|
'*.md'
|
|
|
|
)
|
|
|
|
)
|
|
|
|
cfiles = [*cfiles, *maybe]
|
|
|
|
for cpath in cfiles:
|
2017-10-29 19:11:01 +00:00
|
|
|
cmtime = os.path.getmtime(cpath)
|
2017-10-30 09:24:46 +00:00
|
|
|
if cmtime > self.stime:
|
|
|
|
self.stime = cmtime
|
2017-10-29 19:11:01 +00:00
|
|
|
|
2017-10-27 15:56:05 +01:00
|
|
|
c = Comment(cpath)
|
|
|
|
comments.append(c.mtime, c)
|
|
|
|
return comments
|
|
|
|
|
|
|
|
@property
|
|
|
|
def replies(self):
|
|
|
|
r = {}
|
|
|
|
for mtime, c in self.comments:
|
2017-11-10 15:56:45 +00:00
|
|
|
if c.type == 'webmention':
|
2017-11-10 16:04:05 +00:00
|
|
|
r.update({mtime: c.tmplvars})
|
2017-10-27 15:56:05 +01:00
|
|
|
return sorted(r.items())
|
|
|
|
|
|
|
|
@property
|
|
|
|
def reactions(self):
|
|
|
|
r = {}
|
|
|
|
for mtime, c in self.comments:
|
2017-11-10 15:56:45 +00:00
|
|
|
if c.type == 'webmention':
|
2017-10-27 15:56:05 +01:00
|
|
|
continue
|
|
|
|
if c.type not in r:
|
|
|
|
r[c.type] = {}
|
2017-11-10 16:04:05 +00:00
|
|
|
r[c.type].update({mtime: c.tmplvars})
|
2017-10-27 15:56:05 +01:00
|
|
|
|
|
|
|
for icon, comments in r.items():
|
|
|
|
r[icon] = sorted(comments.items())
|
|
|
|
return r
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def exif(self):
|
|
|
|
if not self.photo:
|
|
|
|
return {}
|
|
|
|
return self.photo.exif
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def published(self):
|
|
|
|
return arrow.get(self.meta.get('published', self.mtime))
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def updated(self):
|
|
|
|
u = self.meta.get('updated', False)
|
|
|
|
if u:
|
|
|
|
u = arrow.get(u)
|
|
|
|
return u
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def pubtime(self):
|
|
|
|
return int(self.published.timestamp)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def is_reply(self):
|
|
|
|
return self.meta.get('in-reply-to', False)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def is_future(self):
|
|
|
|
now = arrow.utcnow().timestamp
|
|
|
|
if self.pubtime > now:
|
2017-05-23 11:14:47 +01:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def licence(self):
|
2017-11-10 15:56:45 +00:00
|
|
|
l = shared.config.get(
|
|
|
|
'licence',
|
|
|
|
self.category,
|
|
|
|
fallback=shared.config.get('licence', 'default',)
|
|
|
|
)
|
2017-10-27 10:29:33 +01:00
|
|
|
return {
|
|
|
|
'text': 'CC %s 4.0' % l.upper(),
|
|
|
|
'url': 'https://creativecommons.org/licenses/%s/4.0/' % l,
|
|
|
|
}
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def corpus(self):
|
|
|
|
corpus = "\n".join([
|
|
|
|
"%s" % self.meta.get('title', ''),
|
|
|
|
"%s" % self.fname,
|
|
|
|
"%s" % self.meta.get('summary', ''),
|
|
|
|
"%s" % self.content,
|
|
|
|
])
|
2017-05-23 11:13:35 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
if self.photo:
|
2018-03-21 15:42:36 +00:00
|
|
|
corpus = corpus + "\n".join(self.tags)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
return corpus
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def lang(self):
|
|
|
|
# default is English, this will only be changed if the try
|
|
|
|
# succeeds and actually detects a language
|
|
|
|
lang = 'en'
|
|
|
|
try:
|
|
|
|
lang = langdetect.detect("\n".join([
|
|
|
|
self.fname,
|
|
|
|
self.meta.get('title', ''),
|
|
|
|
self.content
|
|
|
|
]))
|
2017-11-10 16:04:05 +00:00
|
|
|
except BaseException:
|
2017-10-27 10:29:33 +01:00
|
|
|
pass
|
|
|
|
return lang
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def _find_image(self, fname):
|
2017-10-28 19:08:40 +01:00
|
|
|
fname = os.path.basename(fname)
|
2017-10-27 10:29:33 +01:00
|
|
|
pattern = os.path.join(
|
|
|
|
shared.config.get('dirs', 'files'),
|
2017-10-28 19:08:40 +01:00
|
|
|
'**',
|
2017-10-27 10:29:33 +01:00
|
|
|
fname
|
|
|
|
)
|
|
|
|
logging.debug('trying to locate image %s in %s', fname, pattern)
|
|
|
|
maybe = glob.glob(pattern)
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
if not maybe:
|
2017-10-28 19:08:40 +01:00
|
|
|
logging.error('image not found: %s', fname)
|
2017-10-27 10:29:33 +01:00
|
|
|
return None
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-28 19:08:40 +01:00
|
|
|
maybe = maybe.pop()
|
|
|
|
logging.debug('image found: %s', maybe)
|
2017-10-27 10:29:33 +01:00
|
|
|
if fname not in self._images:
|
2017-10-28 19:08:40 +01:00
|
|
|
im = WebImage(maybe)
|
2017-11-10 15:56:45 +00:00
|
|
|
self._images.append(fname, im)
|
2017-10-27 10:29:33 +01:00
|
|
|
return self._images[fname]
|
2017-06-02 11:19:55 +01:00
|
|
|
|
|
|
|
@property
|
2017-10-27 10:29:33 +01:00
|
|
|
def inline_images(self):
|
|
|
|
return shared.REGEX['mdimg'].findall(self.content)
|
2017-06-03 12:07:03 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def url(self):
|
2018-03-29 17:07:53 +01:00
|
|
|
return "%s/%s/" % (shared.config.get('site', 'url'), self.fname)
|
2017-07-17 14:21:28 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def body(self):
|
|
|
|
body = "%s" % (self.content)
|
|
|
|
# get inline images, downsize them and convert them to figures
|
|
|
|
for shortcode, alt, fname, title, css in self.inline_images:
|
2017-10-28 19:08:40 +01:00
|
|
|
#fname = os.path.basename(fname)
|
2017-10-27 10:29:33 +01:00
|
|
|
im = self._find_image(fname)
|
|
|
|
if not im:
|
|
|
|
continue
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
im.alt = alt
|
|
|
|
im.title = title
|
|
|
|
im.cssclass = css
|
|
|
|
body = body.replace(shortcode, str(im))
|
|
|
|
return body
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
|
|
|
def html(self):
|
2017-10-27 10:29:33 +01:00
|
|
|
html = "%s" % (self.body)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
return shared.Pandoc().convert(html)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
2017-10-27 10:29:33 +01:00
|
|
|
def title(self):
|
|
|
|
maybe = self.meta.get('title', False)
|
|
|
|
if maybe:
|
|
|
|
return maybe
|
|
|
|
if self.is_reply:
|
|
|
|
return "RE: %s" % self.is_reply
|
|
|
|
return self.published.format(shared.ARROWFORMAT['display'])
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2018-04-30 20:44:04 +01:00
|
|
|
@property
|
|
|
|
def review(self):
|
|
|
|
return self.meta.get('review', False)
|
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
2017-10-27 10:29:33 +01:00
|
|
|
def summary(self):
|
|
|
|
s = self.meta.get('summary', '')
|
|
|
|
if not s:
|
|
|
|
return s
|
2018-03-21 15:42:36 +00:00
|
|
|
if not hasattr(self, '_summary'):
|
|
|
|
self._summary = shared.Pandoc().convert(s)
|
|
|
|
return self._summary
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
2017-10-27 10:29:33 +01:00
|
|
|
def shortslug(self):
|
|
|
|
return shared.baseN(self.pubtime)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-11-03 22:54:36 +00:00
|
|
|
@property
|
|
|
|
def syndicate(self):
|
2018-03-29 17:07:53 +01:00
|
|
|
urls = self.meta.get('syndicate', [])
|
2017-11-03 22:54:36 +00:00
|
|
|
if self.photo and self.photo.is_photo:
|
|
|
|
urls.append("https://brid.gy/publish/flickr")
|
|
|
|
return urls
|
|
|
|
|
2018-03-21 15:42:36 +00:00
|
|
|
@property
|
|
|
|
def tags(self):
|
|
|
|
return self.meta.get('tags', [])
|
|
|
|
|
|
|
|
@property
|
|
|
|
def description(self):
|
|
|
|
return html.escape(self.meta.get('summary', ''))
|
|
|
|
|
2018-06-08 10:14:39 +01:00
|
|
|
@property
|
|
|
|
def oembedvars(self):
|
|
|
|
if not hasattr(self, '_oembedvars'):
|
|
|
|
self._oembedvars = {
|
|
|
|
"version": "1.0",
|
|
|
|
"type": "link",
|
|
|
|
"title": self.title,
|
|
|
|
"url": "%s/%s/" % (shared.site.get('url'), self.fname),
|
|
|
|
"author_name": shared.site.get('author').get('name'),
|
|
|
|
"author_url": shared.site.get('author').get('url'),
|
|
|
|
"provider_name": shared.site.get('title'),
|
|
|
|
"provider_url": shared.site.get('url'),
|
|
|
|
}
|
|
|
|
if self.photo:
|
|
|
|
self._oembedvars.update({
|
|
|
|
"type": "photo",
|
|
|
|
"width": self.photo.tmplvars.get('width'),
|
|
|
|
"height": self.photo.tmplvars.get('height'),
|
|
|
|
"url": self.photo.tmplvars.get('src'),
|
|
|
|
})
|
|
|
|
return self._oembedvars
|
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
@property
|
2017-10-27 10:29:33 +01:00
|
|
|
def tmplvars(self):
|
|
|
|
# very simple caching because we might use this 4 times:
|
|
|
|
# post HTML, category, front posts and atom feed
|
|
|
|
if not hasattr(self, '_tmplvars'):
|
|
|
|
self._tmplvars = {
|
|
|
|
'title': self.title,
|
2017-11-10 15:56:45 +00:00
|
|
|
'pubtime': self.published.format(
|
|
|
|
shared.ARROWFORMAT['iso']
|
|
|
|
),
|
|
|
|
'pubdate': self.published.format(
|
|
|
|
shared.ARROWFORMAT['display']
|
|
|
|
),
|
|
|
|
'pubrfc': self.published.format(
|
|
|
|
shared.ARROWFORMAT['rcf']
|
|
|
|
),
|
2017-10-27 10:29:33 +01:00
|
|
|
'category': self.category,
|
|
|
|
'html': self.html,
|
|
|
|
'lang': self.lang,
|
|
|
|
'slug': self.fname,
|
|
|
|
'shortslug': self.shortslug,
|
|
|
|
'licence': self.licence,
|
|
|
|
'is_reply': self.is_reply,
|
|
|
|
'age': int(self.published.format('YYYY')) - int(arrow.utcnow().format('YYYY')),
|
2017-10-27 15:56:05 +01:00
|
|
|
'summary': self.summary,
|
2018-03-21 15:42:36 +00:00
|
|
|
'description': self.description,
|
2017-10-27 15:56:05 +01:00
|
|
|
'replies': self.replies,
|
|
|
|
'reactions': self.reactions,
|
2018-03-21 15:42:36 +00:00
|
|
|
'syndicate': self.syndicate,
|
|
|
|
'tags': self.tags,
|
2018-04-30 20:44:04 +01:00
|
|
|
'photo': False,
|
|
|
|
'enclosure': False,
|
|
|
|
'review': self.review
|
2017-10-27 10:29:33 +01:00
|
|
|
}
|
2018-03-21 15:42:36 +00:00
|
|
|
if self.photo:
|
|
|
|
self._tmplvars.update({
|
2018-04-30 20:44:04 +01:00
|
|
|
'photo': str(self.photo),
|
|
|
|
'enclosure': {
|
|
|
|
'mime': self.photo.mime_type,
|
|
|
|
'size': self.photo.mime_size,
|
|
|
|
'url': self.photo.href
|
|
|
|
}
|
2018-03-21 15:42:36 +00:00
|
|
|
})
|
2018-04-30 20:44:04 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
return self._tmplvars
|
2017-06-28 12:20:26 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
async def render(self):
|
2017-11-10 15:56:45 +00:00
|
|
|
logging.info('rendering %s', self.fname)
|
2017-10-27 10:29:33 +01:00
|
|
|
o = self.htmlfile
|
2017-06-04 11:38:36 +01:00
|
|
|
|
2017-11-10 15:56:45 +00:00
|
|
|
tmplfile = "%s.html" % (self.__class__.__name__)
|
2017-10-27 10:29:33 +01:00
|
|
|
r = shared.j2.get_template(tmplfile).render({
|
|
|
|
'post': self.tmplvars,
|
|
|
|
'site': shared.site,
|
|
|
|
})
|
2017-06-04 11:38:36 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
d = os.path.dirname(o)
|
|
|
|
if not os.path.isdir(d):
|
2017-11-10 15:56:45 +00:00
|
|
|
logging.debug('creating directory %s', d)
|
2017-10-27 10:29:33 +01:00
|
|
|
os.makedirs(d)
|
|
|
|
with open(o, 'wt') as out:
|
2017-11-10 15:56:45 +00:00
|
|
|
logging.debug('writing file %s', o)
|
2017-10-27 10:29:33 +01:00
|
|
|
out.write(r)
|
2017-11-03 22:54:36 +00:00
|
|
|
# use the comment time, not the source file time for this
|
|
|
|
os.utime(o, (self.stime, self.stime))
|
2018-06-08 10:17:57 +01:00
|
|
|
# oembed = os.path.join(
|
|
|
|
#shared.config.get('common', 'build'),
|
|
|
|
# self.fname,
|
|
|
|
# 'oembed.json'
|
|
|
|
# )
|
|
|
|
# with open(oembed, 'wt') as out:
|
|
|
|
#logging.debug('writing oembed file %s', oembed)
|
|
|
|
# out.write(json.dumps(self.oembedvars))
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def __repr__(self):
|
|
|
|
return "%s/%s" % (self.category, self.fname)
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
class WebImage(object):
|
|
|
|
def __init__(self, fpath):
|
|
|
|
logging.info("parsing image: %s", fpath)
|
|
|
|
self.fpath = fpath
|
|
|
|
self.mtime = os.path.getmtime(self.fpath)
|
|
|
|
bname = os.path.basename(fpath)
|
|
|
|
self.fname, self.fext = os.path.splitext(bname)
|
|
|
|
self.title = ''
|
|
|
|
self.alt = bname
|
|
|
|
self.target = ''
|
|
|
|
self.cssclass = ''
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def fm_content(self):
|
|
|
|
return self.meta.get('Description', '')
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def fm_meta(self):
|
|
|
|
return {
|
2017-11-10 15:56:45 +00:00
|
|
|
'published': self.meta.get(
|
|
|
|
'ReleaseDate',
|
2017-10-27 10:29:33 +01:00
|
|
|
self.meta.get('ModifyDate')
|
|
|
|
),
|
|
|
|
'title': self.meta.get('Headline', self.fname),
|
|
|
|
'tags': list(set(self.meta.get('Subject', []))),
|
|
|
|
}
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2018-04-30 20:44:04 +01:00
|
|
|
@property
|
|
|
|
def mime_type(self):
|
|
|
|
return str(self.meta.get('MIMEType', 'image/jpeg'))
|
|
|
|
|
|
|
|
@property
|
|
|
|
def mime_size(self):
|
2018-06-01 10:49:14 +01:00
|
|
|
if self.is_downsizeable:
|
|
|
|
try:
|
|
|
|
return int(self.sizes[-1][1]['fsize'])
|
|
|
|
except Exception as e:
|
|
|
|
pass
|
|
|
|
return int(self.meta.get('FileSize'))
|
2018-04-30 20:44:04 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def href(self):
|
|
|
|
if len(self.target):
|
|
|
|
return self.target
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
if not self.is_downsizeable:
|
|
|
|
return False
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
return self.sizes[-1][1]['url']
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def src(self):
|
|
|
|
# is the image is too small to downsize, it will be copied over
|
|
|
|
# so the link needs to point at
|
|
|
|
src = "/%s/%s" % (
|
|
|
|
shared.config.get('common', 'files'),
|
|
|
|
"%s%s" % (self.fname, self.fext)
|
2017-05-23 11:14:47 +01:00
|
|
|
)
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
if self.is_downsizeable:
|
|
|
|
try:
|
2017-11-10 15:56:45 +00:00
|
|
|
src = [
|
2017-11-10 16:04:05 +00:00
|
|
|
e for e in self.sizes
|
2017-11-10 15:56:45 +00:00
|
|
|
if e[0] == shared.config.getint('photo', 'default')
|
|
|
|
][0][1]['url']
|
2017-11-10 16:04:05 +00:00
|
|
|
except BaseException:
|
2017-10-27 10:29:33 +01:00
|
|
|
pass
|
|
|
|
return src
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def meta(self):
|
|
|
|
if not hasattr(self, '_exif'):
|
|
|
|
# reading EXIF is expensive enough even with a static generator
|
|
|
|
# to consider caching it, so I'll do that here
|
|
|
|
cpath = os.path.join(
|
|
|
|
shared.config.get('var', 'cache'),
|
|
|
|
"%s.exif.json" % self.fname
|
2017-05-23 11:14:47 +01:00
|
|
|
)
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
if os.path.exists(cpath):
|
|
|
|
cmtime = os.path.getmtime(cpath)
|
|
|
|
if cmtime >= self.mtime:
|
|
|
|
with open(cpath, 'rt') as f:
|
|
|
|
self._exif = json.loads(f.read())
|
|
|
|
return self._exif
|
|
|
|
|
|
|
|
self._exif = shared.ExifTool(self.fpath).read()
|
|
|
|
if not os.path.isdir(shared.config.get('var', 'cache')):
|
|
|
|
os.makedirs(shared.config.get('var', 'cache'))
|
|
|
|
with open(cpath, 'wt') as f:
|
|
|
|
f.write(json.dumps(self._exif))
|
|
|
|
return self._exif
|
2017-07-26 11:23:06 +01:00
|
|
|
|
|
|
|
@property
|
2017-10-27 10:29:33 +01:00
|
|
|
def is_photo(self):
|
|
|
|
# missing regex from config
|
|
|
|
if 'photo' not in shared.REGEX:
|
|
|
|
logging.debug('%s photo regex missing from config')
|
|
|
|
return False
|
2017-07-26 11:23:06 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
cpr = self.meta.get('Copyright', '')
|
|
|
|
art = self.meta.get('Artist', '')
|
2017-07-26 11:23:06 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# both Artist and Copyright missing from EXIF
|
|
|
|
if not cpr and not art:
|
|
|
|
logging.debug('%s Artist or Copyright missing from EXIF')
|
|
|
|
return False
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# we have regex, Artist and Copyright, try matching them
|
|
|
|
pattern = re.compile(shared.config.get('photo', 'regex'))
|
|
|
|
if pattern.search(cpr) or pattern.search(art):
|
|
|
|
return True
|
|
|
|
|
|
|
|
logging.debug('%s patterns did not match')
|
|
|
|
return False
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
@property
|
|
|
|
def exif(self):
|
2017-10-27 10:29:33 +01:00
|
|
|
exif = {}
|
2017-06-12 15:17:29 +01:00
|
|
|
if not self.is_photo:
|
2017-10-27 10:29:33 +01:00
|
|
|
return exif
|
2017-06-12 15:17:29 +01:00
|
|
|
|
|
|
|
mapping = {
|
2017-11-10 16:04:05 +00:00
|
|
|
'camera': ['Model'],
|
|
|
|
'aperture': ['FNumber', 'Aperture'],
|
|
|
|
'shutter_speed': ['ExposureTime'],
|
2018-06-08 10:17:57 +01:00
|
|
|
# 'focallength': ['FocalLengthIn35mmFormat', 'FocalLength'],
|
2017-11-10 16:04:05 +00:00
|
|
|
'focallength': ['FocalLength'],
|
|
|
|
'iso': ['ISO'],
|
|
|
|
'lens': ['LensID', 'LensSpec', 'Lens'],
|
|
|
|
'geo_latitude': ['GPSLatitude'],
|
|
|
|
'geo_longitude': ['GPSLongitude'],
|
2017-06-12 15:17:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
for ekey, candidates in mapping.items():
|
|
|
|
for candidate in candidates:
|
|
|
|
maybe = self.meta.get(candidate, None)
|
2017-10-27 10:29:33 +01:00
|
|
|
if not maybe:
|
|
|
|
continue
|
|
|
|
elif 'geo_' in ekey:
|
|
|
|
exif[ekey] = round(float(maybe), 5)
|
|
|
|
else:
|
|
|
|
exif[ekey] = maybe
|
|
|
|
break
|
|
|
|
return exif
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
2017-10-27 10:29:33 +01:00
|
|
|
def sizes(self):
|
|
|
|
sizes = []
|
|
|
|
_max = max(
|
|
|
|
int(self.meta.get('ImageWidth')),
|
|
|
|
int(self.meta.get('ImageHeight'))
|
|
|
|
)
|
2017-06-03 12:07:03 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
for size in shared.config.options('downsize'):
|
|
|
|
if _max < int(size):
|
|
|
|
continue
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
name = '%s_%s%s' % (
|
|
|
|
self.fname,
|
|
|
|
shared.config.get('downsize', size),
|
|
|
|
self.fext
|
|
|
|
)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
fpath = os.path.join(
|
|
|
|
shared.config.get('common', 'build'),
|
|
|
|
shared.config.get('common', 'files'),
|
|
|
|
name
|
|
|
|
)
|
2017-05-23 11:13:35 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
exists = os.path.isfile(fpath)
|
2017-11-10 15:56:45 +00:00
|
|
|
# in case there is a downsized image compare against the main
|
|
|
|
# file's mtime and invalidate the existing if it's older
|
2017-10-27 10:29:33 +01:00
|
|
|
if exists:
|
|
|
|
mtime = os.path.getmtime(fpath)
|
|
|
|
if self.mtime > mtime:
|
|
|
|
exists = False
|
2017-05-23 11:13:35 +01:00
|
|
|
|
2018-06-01 10:49:14 +01:00
|
|
|
smeta = {
|
|
|
|
'fpath': fpath,
|
|
|
|
'exists': False,
|
|
|
|
'url': "%s/%s/%s" % (
|
|
|
|
shared.config.get('site', 'url'),
|
|
|
|
shared.config.get('common', 'files'),
|
|
|
|
name
|
|
|
|
),
|
|
|
|
'crop': shared.config.getboolean(
|
|
|
|
'crop',
|
|
|
|
size,
|
|
|
|
fallback=False
|
|
|
|
),
|
|
|
|
'fsize': int(self.meta.get('FileSize'))
|
|
|
|
}
|
|
|
|
|
|
|
|
if os.path.isfile(fpath):
|
|
|
|
smeta.update({
|
|
|
|
'exists': True,
|
|
|
|
'fsize': os.path.getsize(fpath)
|
|
|
|
})
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
sizes.append((
|
|
|
|
int(size),
|
2018-06-01 10:49:14 +01:00
|
|
|
smeta
|
2017-10-27 10:29:33 +01:00
|
|
|
))
|
|
|
|
return sorted(sizes, reverse=False)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def is_downsizeable(self):
|
2017-10-27 10:29:33 +01:00
|
|
|
""" Check if the image is large enought to downsize it """
|
2017-05-23 11:14:47 +01:00
|
|
|
ftype = self.meta.get('FileType', None)
|
|
|
|
if not ftype:
|
2017-10-27 10:29:33 +01:00
|
|
|
return False
|
|
|
|
elif ftype.lower() != 'jpeg' and ftype.lower() != 'png':
|
|
|
|
return False
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
_max = max(
|
|
|
|
int(self.meta.get('ImageWidth')),
|
|
|
|
int(self.meta.get('ImageHeight'))
|
2017-05-23 11:14:47 +01:00
|
|
|
)
|
2017-11-10 15:56:45 +00:00
|
|
|
_min = shared.config.getint('photo', 'default')
|
2017-10-27 10:29:33 +01:00
|
|
|
if _max > _min:
|
|
|
|
return True
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
return False
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def _maybe_watermark(self, img):
|
2017-05-23 11:14:47 +01:00
|
|
|
""" Composite image by adding watermark file over it """
|
2017-10-27 10:29:33 +01:00
|
|
|
|
|
|
|
if not self.is_photo:
|
|
|
|
logging.debug("not watermarking: not a photo")
|
|
|
|
return img
|
|
|
|
|
|
|
|
wmarkfile = shared.config.get('photo', 'watermark')
|
2017-05-23 11:14:47 +01:00
|
|
|
if not os.path.isfile(wmarkfile):
|
2017-10-27 10:29:33 +01:00
|
|
|
logging.debug("not watermarking: watermark not found")
|
2017-05-23 11:14:47 +01:00
|
|
|
return img
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
logging.debug("%s is a photo, applying watermarking", self.fpath)
|
2017-05-23 11:14:47 +01:00
|
|
|
with wand.image.Image(filename=wmarkfile) as wmark:
|
|
|
|
if img.width > img.height:
|
2017-10-27 10:29:33 +01:00
|
|
|
w = img.width * 0.2
|
2017-05-23 11:14:47 +01:00
|
|
|
h = wmark.height * (w / wmark.width)
|
|
|
|
x = img.width - w - (img.width * 0.01)
|
|
|
|
y = img.height - h - (img.height * 0.01)
|
|
|
|
else:
|
|
|
|
w = img.height * 0.16
|
|
|
|
h = wmark.height * (w / wmark.width)
|
|
|
|
x = img.width - h - (img.width * 0.01)
|
|
|
|
y = img.height - w - (img.height * 0.01)
|
|
|
|
|
|
|
|
w = round(w)
|
|
|
|
h = round(h)
|
|
|
|
x = round(x)
|
|
|
|
y = round(y)
|
|
|
|
|
|
|
|
wmark.resize(w, h)
|
|
|
|
if img.width <= img.height:
|
|
|
|
wmark.rotate(-90)
|
|
|
|
img.composite(image=wmark, left=x, top=y)
|
2017-10-27 10:29:33 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
return img
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def _copy(self):
|
|
|
|
fname = "%s%s" % (self.fname, self.fext)
|
|
|
|
fpath = os.path.join(
|
|
|
|
shared.config.get('common', 'build'),
|
|
|
|
shared.config.get('common', 'files'),
|
|
|
|
fname
|
|
|
|
)
|
|
|
|
if os.path.isfile(fpath):
|
|
|
|
mtime = os.path.getmtime(fpath)
|
|
|
|
if self.mtime <= mtime:
|
|
|
|
return
|
2017-10-29 19:11:01 +00:00
|
|
|
logging.info("copying %s to build dir", fname)
|
2017-10-27 10:29:33 +01:00
|
|
|
shutil.copy(self.fpath, fpath)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def _intermediate_dimension(self, size, width, height, crop=False):
|
|
|
|
""" Calculate intermediate resize dimension and return a tuple of width, height """
|
2017-05-23 11:14:47 +01:00
|
|
|
size = int(size)
|
|
|
|
if (width > height and not crop) \
|
2017-11-10 16:04:05 +00:00
|
|
|
or (width < height and crop):
|
2017-05-23 11:14:47 +01:00
|
|
|
w = size
|
|
|
|
h = int(float(size / width) * height)
|
2017-05-23 11:13:35 +01:00
|
|
|
else:
|
2017-05-23 11:14:47 +01:00
|
|
|
h = size
|
|
|
|
w = int(float(size / height) * width)
|
|
|
|
return (w, h)
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def _intermediate(self, img, size, target, crop=False):
|
2017-10-29 19:11:01 +00:00
|
|
|
if img.width < size and img.height < size:
|
2017-05-23 11:14:47 +01:00
|
|
|
return False
|
|
|
|
|
|
|
|
with img.clone() as thumb:
|
2017-10-27 10:29:33 +01:00
|
|
|
width, height = self._intermediate_dimension(
|
2017-05-23 11:14:47 +01:00
|
|
|
size,
|
|
|
|
img.width,
|
|
|
|
img.height,
|
|
|
|
crop
|
|
|
|
)
|
|
|
|
thumb.resize(width, height)
|
|
|
|
|
|
|
|
if crop:
|
|
|
|
thumb.liquid_rescale(size, size, 1, 1)
|
|
|
|
|
|
|
|
if self.meta.get('FileType', 'jpeg').lower() == 'jpeg':
|
2017-11-01 13:19:39 +00:00
|
|
|
thumb.compression_quality = 94
|
2017-05-23 11:14:47 +01:00
|
|
|
thumb.unsharp_mask(
|
2017-11-03 22:54:36 +00:00
|
|
|
radius=1,
|
2017-05-23 11:14:47 +01:00
|
|
|
sigma=0.5,
|
2017-11-01 13:19:39 +00:00
|
|
|
amount=0.7,
|
2017-11-03 22:54:36 +00:00
|
|
|
threshold=0.5
|
2017-05-23 11:13:35 +01:00
|
|
|
)
|
2017-05-23 11:14:47 +01:00
|
|
|
thumb.format = 'pjpeg'
|
|
|
|
|
|
|
|
# this is to make sure pjpeg happens
|
2017-10-27 10:29:33 +01:00
|
|
|
with open(target, 'wb') as f:
|
|
|
|
logging.info("writing %s", target)
|
2017-05-23 11:14:47 +01:00
|
|
|
thumb.save(file=f)
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def needs_downsize(self):
|
|
|
|
needed = False
|
|
|
|
for (size, downsized) in self.sizes:
|
|
|
|
if downsized.get('exists', False):
|
2017-11-10 15:56:45 +00:00
|
|
|
logging.debug(
|
|
|
|
"size %d exists: %s",
|
|
|
|
size,
|
|
|
|
downsized.get('fpath')
|
|
|
|
)
|
2017-10-27 10:29:33 +01:00
|
|
|
continue
|
2017-11-10 15:56:45 +00:00
|
|
|
logging.debug(
|
|
|
|
"size %d missing: %s",
|
|
|
|
size,
|
|
|
|
downsized.get('fpath')
|
|
|
|
)
|
2017-10-27 10:29:33 +01:00
|
|
|
needed = True
|
|
|
|
return needed
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
async def downsize(self):
|
2017-05-23 11:14:47 +01:00
|
|
|
if not self.is_downsizeable:
|
2017-10-27 10:29:33 +01:00
|
|
|
return self._copy()
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-11-10 16:04:05 +00:00
|
|
|
if not self.needs_downsize and not shared.config.getboolean(
|
|
|
|
'params', 'regenerate'):
|
2017-10-27 10:29:33 +01:00
|
|
|
return
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
build_files = os.path.join(
|
|
|
|
shared.config.get('common', 'build'),
|
|
|
|
shared.config.get('common', 'files'),
|
|
|
|
)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
if not os.path.isdir(build_files):
|
|
|
|
os.makedirs(build_files)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
logging.info("downsizing %s%s", self.fname, self.fext)
|
2017-05-23 11:14:47 +01:00
|
|
|
with wand.image.Image(filename=self.fpath) as img:
|
|
|
|
img.auto_orient()
|
2017-10-27 10:29:33 +01:00
|
|
|
img = self._maybe_watermark(img)
|
|
|
|
for (size, downsized) in self.sizes:
|
|
|
|
self._intermediate(
|
|
|
|
img,
|
|
|
|
size,
|
|
|
|
downsized['fpath'],
|
|
|
|
downsized['crop']
|
2017-06-12 15:17:29 +01:00
|
|
|
)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2018-03-21 15:42:36 +00:00
|
|
|
@property
|
|
|
|
def src_size(self):
|
|
|
|
width = int(self.meta.get('ImageWidth'))
|
|
|
|
height = int(self.meta.get('ImageHeight'))
|
|
|
|
|
|
|
|
if not self.is_downsizeable:
|
|
|
|
return width, height
|
|
|
|
|
|
|
|
return self._intermediate_dimension(
|
|
|
|
shared.config.getint('photo', 'default'),
|
|
|
|
width,
|
|
|
|
height
|
|
|
|
)
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
@property
|
|
|
|
def tmplvars(self):
|
2018-03-21 15:42:36 +00:00
|
|
|
src_width, src_height = self.src_size
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
return {
|
|
|
|
'src': self.src,
|
2018-03-21 15:42:36 +00:00
|
|
|
'width': src_width,
|
|
|
|
'height': src_height,
|
2017-10-27 10:29:33 +01:00
|
|
|
'target': self.href,
|
|
|
|
'css': self.cssclass,
|
2017-05-23 11:14:47 +01:00
|
|
|
'title': self.title,
|
2017-10-27 10:29:33 +01:00
|
|
|
'alt': self.alt,
|
2017-05-23 11:14:47 +01:00
|
|
|
'exif': self.exif,
|
2017-10-27 10:29:33 +01:00
|
|
|
'is_photo': self.is_photo,
|
|
|
|
'author': self.meta.get('Artist', ''),
|
2017-05-23 11:14:47 +01:00
|
|
|
}
|
2017-05-31 13:53:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def __repr__(self):
|
|
|
|
return "Image: %s, photo: %r, EXIF: %s" % (
|
|
|
|
self.fname, self.is_photo, self.exif
|
|
|
|
)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def __str__(self):
|
2017-11-10 15:56:45 +00:00
|
|
|
tmplfile = "%s.html" % (self.__class__.__name__)
|
|
|
|
return shared.j2.get_template(tmplfile).render({
|
|
|
|
'photo': self.tmplvars
|
|
|
|
})
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-31 13:53:47 +01:00
|
|
|
|
2017-10-27 15:56:05 +01:00
|
|
|
class Comment(object):
|
|
|
|
def __init__(self, fpath):
|
|
|
|
logging.debug("initiating comment object from %s", fpath)
|
|
|
|
self.fpath = fpath
|
|
|
|
self.mtime = os.path.getmtime(self.fpath)
|
|
|
|
with open(self.fpath, mode='rt') as f:
|
|
|
|
self.fm = frontmatter.parse(f.read())
|
|
|
|
self.meta, self.content = self.fm
|
|
|
|
|
|
|
|
@property
|
|
|
|
def dt(self):
|
|
|
|
return arrow.get(self.meta.get('date'))
|
|
|
|
|
|
|
|
@property
|
|
|
|
def html(self):
|
|
|
|
html = "%s" % (self.content)
|
|
|
|
return shared.Pandoc().convert(html)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def target(self):
|
|
|
|
t = urlparse(self.meta.get('target'))
|
|
|
|
return t.path.rstrip('/').strip('/').split('/')[-1]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def source(self):
|
|
|
|
return self.meta.get('source')
|
|
|
|
|
|
|
|
@property
|
|
|
|
def author(self):
|
2018-03-21 15:42:36 +00:00
|
|
|
r = {
|
|
|
|
'name': urlparse(self.source).hostname,
|
|
|
|
'url': self.source
|
2017-10-27 15:56:05 +01:00
|
|
|
}
|
|
|
|
|
2018-03-21 15:42:36 +00:00
|
|
|
author = self.meta.get('author')
|
|
|
|
if not author:
|
|
|
|
return r
|
|
|
|
|
|
|
|
if 'name' in author:
|
2018-06-08 10:17:57 +01:00
|
|
|
r.update({'name': self.meta.get('author').get('name')})
|
2018-06-01 10:49:14 +01:00
|
|
|
elif 'url' in author:
|
2018-06-08 10:17:57 +01:00
|
|
|
r.update(
|
|
|
|
{'name': urlparse(self.meta.get('author').get('url')).hostname})
|
2018-03-21 15:42:36 +00:00
|
|
|
|
|
|
|
return r
|
|
|
|
|
2017-10-27 15:56:05 +01:00
|
|
|
@property
|
|
|
|
def type(self):
|
|
|
|
# caching, because calling Pandoc is expensive
|
|
|
|
if not hasattr(self, '_type'):
|
|
|
|
self._type = 'webmention'
|
|
|
|
t = self.meta.get('type', 'webmention')
|
2017-11-10 15:56:45 +00:00
|
|
|
if t != 'webmention':
|
2017-10-27 15:56:05 +01:00
|
|
|
self._type = '★'
|
|
|
|
|
|
|
|
if len(self.content):
|
|
|
|
maybe = shared.Pandoc('plain').convert(self.content)
|
|
|
|
if maybe in UNICODE_EMOJI:
|
|
|
|
self._type = maybe
|
|
|
|
return self._type
|
|
|
|
|
|
|
|
@property
|
|
|
|
def tmplvars(self):
|
|
|
|
if not hasattr(self, '_tmplvars'):
|
|
|
|
self._tmplvars = {
|
|
|
|
'author': self.author,
|
|
|
|
'source': self.source,
|
|
|
|
'pubtime': self.dt.format(shared.ARROWFORMAT['iso']),
|
|
|
|
'pubdate': self.dt.format(shared.ARROWFORMAT['display']),
|
|
|
|
'html': self.html,
|
|
|
|
'type': self.type
|
|
|
|
}
|
|
|
|
return self._tmplvars
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return "Comment from %s for %s" % (
|
|
|
|
self.source, self.target
|
|
|
|
)
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
tmplfile = "%s.html" % (__class__.__name__)
|
2017-11-10 15:56:45 +00:00
|
|
|
return shared.j2.get_template(tmplfile).render({
|
|
|
|
'comment': self.tmplvars
|
|
|
|
})
|
2017-10-27 15:56:05 +01:00
|
|
|
|
|
|
|
|
2017-10-29 19:11:01 +00:00
|
|
|
class Webmention(object):
|
2017-11-10 15:56:45 +00:00
|
|
|
def __init__(self, source, target, dt=arrow.utcnow().timestamp):
|
2017-10-29 19:11:01 +00:00
|
|
|
self.source = source
|
|
|
|
self.target = target
|
|
|
|
self.dt = arrow.get(dt).to('utc')
|
|
|
|
logging.info(
|
|
|
|
"processing webmention %s => %s",
|
|
|
|
self.source,
|
|
|
|
self.target
|
|
|
|
)
|
2018-06-01 10:49:14 +01:00
|
|
|
self._source = None
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2017-10-30 10:47:08 +00:00
|
|
|
def send(self):
|
2018-03-29 17:07:53 +01:00
|
|
|
rels = shared.XRay(self.target).set_discover().parse()
|
2017-10-30 10:47:08 +00:00
|
|
|
endpoint = False
|
|
|
|
if 'rels' not in rels:
|
2018-03-29 17:07:53 +01:00
|
|
|
logging.debug("no rel found for %s", self.target)
|
|
|
|
return True
|
2017-10-30 10:47:08 +00:00
|
|
|
for k in rels.get('rels').keys():
|
|
|
|
if 'webmention' in k:
|
2018-03-29 17:07:53 +01:00
|
|
|
endpoint = rels.get('rels').get(k).pop()
|
2017-10-30 10:47:08 +00:00
|
|
|
break
|
|
|
|
if not endpoint:
|
2018-03-29 17:07:53 +01:00
|
|
|
logging.debug("no endpoint found for %s", self.target)
|
|
|
|
return True
|
|
|
|
logging.info(
|
|
|
|
"Sending webmention to endpoint: %s, source: %s, target: %s",
|
|
|
|
endpoint,
|
|
|
|
self.source,
|
2017-10-30 10:47:08 +00:00
|
|
|
self.target,
|
|
|
|
)
|
2018-03-29 17:07:53 +01:00
|
|
|
try:
|
|
|
|
p = requests.post(
|
|
|
|
endpoint,
|
|
|
|
data={
|
|
|
|
'source': self.source,
|
|
|
|
'target': self.target
|
|
|
|
}
|
|
|
|
)
|
|
|
|
if p.status_code == requests.codes.ok:
|
2018-06-01 10:49:14 +01:00
|
|
|
logging.info("webmention sent")
|
|
|
|
return True
|
|
|
|
elif p.status_code == 400 and 'brid.gy' in self.target:
|
2018-06-08 10:17:57 +01:00
|
|
|
logging.warning(
|
|
|
|
"potential bridgy duplicate: %s %s",
|
|
|
|
p.status_code,
|
|
|
|
p.text)
|
2018-03-29 17:07:53 +01:00
|
|
|
return True
|
2018-06-01 10:49:14 +01:00
|
|
|
else:
|
2018-06-08 10:17:57 +01:00
|
|
|
logging.error(
|
|
|
|
"webmention failure: %s %s",
|
|
|
|
p.status_code,
|
|
|
|
p.text)
|
2018-06-01 10:49:14 +01:00
|
|
|
return False
|
2018-03-29 17:07:53 +01:00
|
|
|
except Exception as e:
|
|
|
|
logging.error("sending webmention failed: %s", e)
|
|
|
|
return False
|
2017-10-30 10:47:08 +00:00
|
|
|
|
|
|
|
def receive(self):
|
2018-06-01 10:49:14 +01:00
|
|
|
head = requests.head(self.source)
|
|
|
|
if head.status_code == 410:
|
|
|
|
self._delete()
|
|
|
|
return
|
|
|
|
elif head.status_code != requests.codes.ok:
|
|
|
|
logging.error(
|
|
|
|
"webmention source failure: %s %s",
|
|
|
|
head.status_code,
|
|
|
|
self.source
|
|
|
|
)
|
|
|
|
return
|
|
|
|
|
|
|
|
self._source = shared.XRay(self.source).parse()
|
2017-10-30 09:24:46 +00:00
|
|
|
if 'data' not in self._source:
|
2018-06-08 10:17:57 +01:00
|
|
|
logging.error(
|
|
|
|
"no data found in webmention source: %s",
|
|
|
|
self.source)
|
2017-10-30 09:24:46 +00:00
|
|
|
return
|
2017-10-29 19:11:01 +00:00
|
|
|
self._save()
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2018-06-01 10:49:14 +01:00
|
|
|
def _delete(self):
|
|
|
|
if os.path.isfile(self.fpath):
|
|
|
|
logging.info("Deleting webmention %s", self.fpath)
|
|
|
|
os.unlink(self.fpath)
|
|
|
|
return
|
|
|
|
|
|
|
|
def _save(self):
|
|
|
|
fm = frontmatter.loads('')
|
|
|
|
fm.content = self.content
|
|
|
|
fm.metadata = self.meta
|
|
|
|
with open(self.fpath, 'wt') as f:
|
|
|
|
logging.info("Saving webmention to %s", self.fpath)
|
|
|
|
f.write(frontmatter.dumps(fm))
|
|
|
|
return
|
|
|
|
|
2017-10-29 19:11:01 +00:00
|
|
|
@property
|
|
|
|
def relation(self):
|
|
|
|
r = 'webmention'
|
|
|
|
k = self._source.get('data').keys()
|
|
|
|
for maybe in ['in-reply-to', 'repost-of', 'bookmark-of', 'like-of']:
|
|
|
|
if maybe in k:
|
|
|
|
r = maybe
|
|
|
|
break
|
|
|
|
return r
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2017-10-29 19:11:01 +00:00
|
|
|
@property
|
|
|
|
def meta(self):
|
|
|
|
if not hasattr(self, '_meta'):
|
|
|
|
self._meta = {
|
|
|
|
'author': self._source.get('data').get('author'),
|
|
|
|
'type': self.relation,
|
|
|
|
'target': self.target,
|
|
|
|
'source': self.source,
|
|
|
|
'date': self._source.get('data').get('published'),
|
|
|
|
}
|
|
|
|
return self._meta
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2017-10-29 19:11:01 +00:00
|
|
|
@property
|
|
|
|
def content(self):
|
2017-10-30 09:24:46 +00:00
|
|
|
if 'content' not in self._source.get('data'):
|
|
|
|
return ''
|
|
|
|
elif 'html' in self._source.get('data').get('content'):
|
|
|
|
what = self._source.get('data').get('content').get('html')
|
|
|
|
elif 'text' in self._source.get('data').get('content'):
|
|
|
|
what = self._source.get('data').get('content').get('text')
|
|
|
|
else:
|
|
|
|
return ''
|
2017-11-10 15:56:45 +00:00
|
|
|
return shared.Pandoc('html').convert(what)
|
2017-10-29 19:11:01 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def fname(self):
|
|
|
|
return "%d-%s.md" % (
|
|
|
|
self.dt.timestamp,
|
|
|
|
shared.slugfname(self.source)
|
|
|
|
)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def fpath(self):
|
|
|
|
tdir = os.path.join(
|
|
|
|
shared.config.get('dirs', 'comment'),
|
|
|
|
self.target.rstrip('/').strip('/').split('/')[-1]
|
|
|
|
)
|
|
|
|
if not os.path.isdir(tdir):
|
|
|
|
os.makedirs(tdir)
|
|
|
|
return os.path.join(
|
|
|
|
tdir,
|
|
|
|
self.fname
|
|
|
|
)
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2017-11-10 15:56:45 +00:00
|
|
|
|
|
|
|
class Worker(object):
|
|
|
|
def __init__(self):
|
|
|
|
self._tasks = []
|
|
|
|
self._loop = asyncio.get_event_loop()
|
|
|
|
|
|
|
|
def append(self, job):
|
|
|
|
task = self._loop.create_task(job)
|
|
|
|
self._tasks.append(task)
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
w = asyncio.wait(self._tasks)
|
|
|
|
self._loop.run_until_complete(w)
|
|
|
|
self._loop.close()
|
|
|
|
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def setup():
|
|
|
|
""" parse input parameters and add them as params section to config """
|
|
|
|
parser = argparse.ArgumentParser(description='Parameters for NASG')
|
2017-05-31 13:53:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
booleanparams = {
|
|
|
|
'regenerate': 'force downsizing images',
|
|
|
|
'force': 'force rendering HTML',
|
|
|
|
}
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
for k, v in booleanparams.items():
|
2017-06-28 12:20:26 +01:00
|
|
|
parser.add_argument(
|
2017-10-27 10:29:33 +01:00
|
|
|
'--%s' % (k),
|
2017-06-28 12:20:26 +01:00
|
|
|
action='store_true',
|
|
|
|
default=False,
|
2017-11-10 15:56:45 +00:00
|
|
|
help=v
|
2017-06-28 12:20:26 +01:00
|
|
|
)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
parser.add_argument(
|
|
|
|
'--loglevel',
|
|
|
|
default='warning',
|
|
|
|
help='change loglevel'
|
|
|
|
)
|
2017-07-26 11:23:06 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
if not shared.config.has_section('params'):
|
|
|
|
shared.config.add_section('params')
|
2017-07-26 11:23:06 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
params = vars(parser.parse_args())
|
|
|
|
for k, v in params.items():
|
|
|
|
shared.config.set('params', k, str(v))
|
|
|
|
|
|
|
|
# remove the rest of the potential loggers
|
|
|
|
while len(logging.root.handlers) > 0:
|
|
|
|
logging.root.removeHandler(logging.root.handlers[-1])
|
|
|
|
|
|
|
|
logging.basicConfig(
|
|
|
|
level=shared.LLEVEL[shared.config.get('params', 'loglevel')],
|
|
|
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
|
|
)
|
|
|
|
|
2017-11-10 15:56:45 +00:00
|
|
|
|
2017-10-28 19:08:40 +01:00
|
|
|
def youngest_mtime(root):
|
|
|
|
youngest = 0
|
|
|
|
files = glob.glob(os.path.join(root, '**'), recursive=True)
|
|
|
|
for f in files:
|
|
|
|
mtime = os.path.getmtime(f)
|
|
|
|
if mtime > youngest:
|
|
|
|
youngest = mtime
|
|
|
|
return youngest
|
|
|
|
|
2017-11-10 15:56:45 +00:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def build():
|
|
|
|
setup()
|
2017-10-28 19:08:40 +01:00
|
|
|
|
2017-11-10 15:56:45 +00:00
|
|
|
worker = Worker()
|
2017-10-27 10:29:33 +01:00
|
|
|
content = Content()
|
2017-10-30 09:24:46 +00:00
|
|
|
sdb = shared.SearchDB()
|
2017-10-27 10:29:33 +01:00
|
|
|
magic = MagicPHP()
|
|
|
|
|
2018-04-30 20:44:04 +01:00
|
|
|
collector_front = Category(is_front=True)
|
2017-10-27 10:29:33 +01:00
|
|
|
collector_categories = NoDupeContainer()
|
2018-03-21 15:42:36 +00:00
|
|
|
sitemap = {}
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
for f, post in content:
|
|
|
|
logging.info("PARSING %s", f)
|
2017-10-29 19:11:01 +00:00
|
|
|
post.init_extras()
|
2018-03-29 17:07:53 +01:00
|
|
|
post.queue_webmentions()
|
2017-10-27 10:29:33 +01:00
|
|
|
|
2018-03-21 15:42:36 +00:00
|
|
|
# add to sitemap
|
2018-06-08 10:17:57 +01:00
|
|
|
sitemap.update({post.url: post.mtime})
|
2018-03-21 15:42:36 +00:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# extend redirects
|
|
|
|
for r in post.redirects:
|
|
|
|
magic.redirects.append((r, post.fname))
|
|
|
|
|
|
|
|
# add post to search, if needed
|
|
|
|
if not sdb.is_uptodate(post.fname, post.mtime):
|
|
|
|
sdb.append(
|
|
|
|
post.fname,
|
|
|
|
post.corpus,
|
|
|
|
post.mtime,
|
|
|
|
post.url,
|
|
|
|
post.category,
|
|
|
|
post.title
|
2017-06-02 11:19:55 +01:00
|
|
|
)
|
2017-05-26 10:14:24 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# add render task, if needed
|
2017-10-28 19:08:40 +01:00
|
|
|
if not post.is_uptodate or shared.config.getboolean('params', 'force'):
|
2017-11-10 15:56:45 +00:00
|
|
|
worker.append(post.render())
|
2017-10-27 10:29:33 +01:00
|
|
|
|
|
|
|
# collect images to downsize
|
|
|
|
for fname, im in post.images:
|
2017-11-10 15:56:45 +00:00
|
|
|
worker.append(im.downsize())
|
2017-10-27 10:29:33 +01:00
|
|
|
|
2017-10-28 19:08:40 +01:00
|
|
|
# skip adding future posts to any category
|
|
|
|
if post.is_future:
|
|
|
|
continue
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# skip categories starting with _
|
|
|
|
if post.category.startswith('_'):
|
|
|
|
continue
|
2017-11-10 15:56:45 +00:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# get the category otherwise
|
2017-11-10 15:56:45 +00:00
|
|
|
if post.category not in collector_categories:
|
2017-10-27 10:29:33 +01:00
|
|
|
c = Category(post.category)
|
|
|
|
collector_categories.append(post.category, c)
|
|
|
|
else:
|
|
|
|
c = collector_categories[post.category]
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# add post to category
|
|
|
|
c.append(post)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# add post to front
|
|
|
|
collector_front.append(post)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# write search db
|
|
|
|
sdb.finish()
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# render front
|
2017-11-10 15:56:45 +00:00
|
|
|
if not collector_front.is_uptodate or \
|
2017-11-10 16:04:05 +00:00
|
|
|
shared.config.getboolean('params', 'force'):
|
2017-11-10 15:56:45 +00:00
|
|
|
worker.append(collector_front.render())
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# render categories
|
|
|
|
for name, c in collector_categories:
|
2017-10-28 19:08:40 +01:00
|
|
|
if not c.is_uptodate or shared.config.getboolean('params', 'force'):
|
2017-11-10 15:56:45 +00:00
|
|
|
worker.append(c.render())
|
2018-03-28 15:19:14 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# add magic.php rendering
|
2017-11-10 15:56:45 +00:00
|
|
|
worker.append(magic.render())
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# do all the things!
|
2017-11-10 15:56:45 +00:00
|
|
|
worker.run()
|
2017-05-26 10:14:24 +01:00
|
|
|
|
2018-03-29 17:07:53 +01:00
|
|
|
# send webmentions - this is synchronous due to the SQLite locking
|
|
|
|
wdb = shared.WebmentionQueue()
|
|
|
|
for out in wdb.get_outbox():
|
|
|
|
wm = Webmention(
|
|
|
|
out.get('source'),
|
|
|
|
out.get('target'),
|
|
|
|
out.get('dt')
|
|
|
|
)
|
|
|
|
if wm.send():
|
|
|
|
wdb.entry_done(out.get('id'))
|
|
|
|
wdb.finish()
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# copy static
|
|
|
|
logging.info('copying static files')
|
|
|
|
src = shared.config.get('dirs', 'static')
|
|
|
|
for item in os.listdir(src):
|
2017-11-10 15:56:45 +00:00
|
|
|
s = os.path.join(src, item)
|
2018-03-28 15:19:14 +01:00
|
|
|
stime = os.path.getmtime(s)
|
2017-11-10 15:56:45 +00:00
|
|
|
d = os.path.join(shared.config.get('common', 'build'), item)
|
2018-03-28 15:19:14 +01:00
|
|
|
dtime = 0
|
|
|
|
if os.path.exists(d):
|
|
|
|
dtime = os.path.getmtime(d)
|
|
|
|
|
2018-06-08 10:17:57 +01:00
|
|
|
if not os.path.exists(d) or shared.config.getboolean(
|
|
|
|
'params', 'force') or dtime < stime:
|
2017-10-27 10:29:33 +01:00
|
|
|
logging.debug("copying static file %s to %s", s, d)
|
2017-05-23 11:14:47 +01:00
|
|
|
shutil.copy2(s, d)
|
2018-03-21 15:42:36 +00:00
|
|
|
if '.html' in item:
|
|
|
|
url = "%s/%s" % (shared.config.get('site', 'url'), item)
|
|
|
|
sitemap.update({
|
|
|
|
url: os.path.getmtime(s)
|
|
|
|
})
|
|
|
|
|
|
|
|
# dump sitemap, if needed
|
2018-06-08 10:17:57 +01:00
|
|
|
sitemapf = os.path.join(
|
|
|
|
shared.config.get(
|
|
|
|
'common',
|
|
|
|
'build'),
|
|
|
|
'sitemap.txt')
|
2018-03-21 15:42:36 +00:00
|
|
|
sitemap_update = True
|
|
|
|
if os.path.exists(sitemapf):
|
|
|
|
if int(max(sitemap.values())) <= int(os.path.getmtime(sitemapf)):
|
|
|
|
sitemap_update = False
|
|
|
|
|
|
|
|
if sitemap_update:
|
|
|
|
logging.info('writing updated sitemap')
|
|
|
|
with open(sitemapf, 'wt') as smap:
|
|
|
|
smap.write("\n".join(sorted(sitemap.keys())))
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-11-10 16:04:05 +00:00
|
|
|
|
2017-05-23 11:13:35 +01:00
|
|
|
if __name__ == '__main__':
|
2017-10-27 10:29:33 +01:00
|
|
|
build()
|