diff --git a/.gitignore b/.gitignore
index bf84ffe..8d411c1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -101,3 +101,4 @@ ENV/
.mypy_cache/
config.ini
config.yml
+nasg/config.py
diff --git a/cache.py b/cache.py
deleted file mode 100644
index 22d05bc..0000000
--- a/cache.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import os
-import json
-import hashlib
-import logging
-import glob
-
-class Cached(object):
- def __init__(self, hash='', text='', stime=0):
-
- if not os.path.isdir(glob.CACHE):
- os.mkdir(glob.CACHE)
-
- if hash:
- self._hbase = hash
- elif text:
- self._hbase = hashlib.sha1(text.encode('utf-8')).hexdigest()
- else:
- print("No identifier passed for Cached")
- raise
-
- self._cpath = os.path.join(glob.CACHE, self._hbase)
- self._stime = stime
-
- if os.path.isfile(self._cpath):
- self._ctime = os.stat(self._cpath)
- else:
- self._ctime = None
-
- def get(self):
- if not glob.CACHEENABLED:
- return None
-
- cached = ''
- if os.path.isfile(self._cpath):
- if self._stime and self._stime.st_mtime == self._ctime.st_mtime:
- logging.debug("Cache exists at %s; using it" % (self._cpath ))
- with open(self._cpath, 'r') as c:
- cached = c.read()
- c.close()
- # invalidate old
- elif self._stime and self._stime.st_mtime > self._ctime.st_mtime:
- logging.debug("invalidating cache at %s" % (self._cpath ))
- os.remove(self._cpath)
-
- return cached
-
- def set(self, content):
- if not glob.CACHEENABLED:
- return None
-
- with open(self._cpath, "w") as c:
- logging.debug("writing cache to %s" % (self._cpath ))
- c.write(content)
- c.close()
- if self._stime:
- os.utime(self._cpath, (self._stime.st_mtime, self._stime.st_mtime ))
\ No newline at end of file
diff --git a/generator.py b/generator.py
deleted file mode 100644
index 6a365d4..0000000
--- a/generator.py
+++ /dev/null
@@ -1,293 +0,0 @@
-#!/home/petermolnar.net/.venv/bin/python3.5
-
-"""Usage: generator.py [-h] [-f] [-g] [-p] [-d] [-s FILE]
-
--h --help show this
--f --force force HTML file rendering
--p --pandoc force re-rendering content HTML
--g --regenerate regenerate images
--s --single FILE only (re)generate a single entity
--d --debug set logging level
-"""
-
-import os
-import shutil
-import logging
-import atexit
-import json
-import sys
-import tempfile
-import glob
-from whoosh import index
-from docopt import docopt
-from ruamel import yaml
-from webmentiontools.send import WebmentionSend
-import taxonomy
-import singular
-from slugify import slugify
-import arrow
-
-
-class Engine(object):
- lockfile = "/tmp/petermolnar.net.generator.lock"
-
- def __init__(self):
- if os.path.isfile(self.lockfile):
- raise ValueError("Lockfile %s is present; generator won't run.")
- else:
- with open(self.lockfile, "w") as lock:
- lock.write(arrow.utcnow().format())
- lock.close()
-
- atexit.register(self.removelock)
- atexit.register(self.removetmp)
-
- self._mkdirs()
- self.tags = {}
- self.category = {}
- self.allposts = None
- self.frontposts = None
-
- self.slugsdb = os.path.join(glob.CACHE, "slugs.json")
- if os.path.isfile(self.slugsdb):
- with open(self.slugsdb) as slugsdb:
- self.allslugs = json.loads(slugsdb.read())
- slugsdb.close()
- else:
- self.allslugs = []
-
- self.tmpwhoosh = tempfile.mkdtemp('whooshdb_', dir=tempfile.gettempdir())
- self.whoosh = index.create_in(self.tmpwhoosh, glob.schema)
-
-
- def removelock(self):
- os.unlink(self.lockfile)
-
-
- def removetmp(self):
- if os.path.isdir(self.tmpwhoosh):
- for root, dirs, files in os.walk(self.tmpwhoosh, topdown=False):
- for f in files:
- os.remove(os.path.join(root, f))
- for d in dirs:
- os.rmdir(os.path.join(root, d))
-
-
- def initbuilder(self):
- self._copy_and_compile()
-
-
- def cleanup(self):
- with open(os.path.join(glob.CACHE, "slugs.json"), "w") as db:
- logging.info("updating slugs database")
- db.write(json.dumps(self.allslugs))
- db.close()
-
- tags = []
- for tslug, taxonomy in self.tags.items():
- tags.append(taxonomy.name)
-
- with open(os.path.join(glob.CACHE, "tags.json"), "w") as db:
- logging.info("updating tags database")
- db.write(json.dumps(tags))
- db.close()
-
- logging.info("deleting old searchdb")
- shutil.rmtree(glob.SEARCHDB)
- logging.info("moving new searchdb")
- shutil.move(self.tmpwhoosh, glob.SEARCHDB)
-
-
- def _mkdirs(self):
- for d in [glob.TARGET, glob.TFILES, glob.TTHEME, glob.CACHE]:
- if not os.path.isdir(d):
- os.mkdir(d)
-
-
- def _copy_and_compile(self):
- for f in os.listdir(glob.STHEME):
- p = os.path.join(glob.STHEME, f)
- if os.path.isdir(p):
- try:
- shutil.copytree(p, os.path.join(glob.TTHEME, f))
- except FileExistsError:
- pass
- else:
- path, fname = os.path.split(p)
- fname, ext = os.path.splitext(fname)
- logging.debug("copying %s", p)
- shutil.copy(p, os.path.join(glob.TTHEME, f))
-
- @staticmethod
- def postbycategory(fpath, catd=None, catn=None):
- if catd == 'photo':
- post = singular.PhotoHandler(fpath, category=catn)
- elif catd == 'page':
- post = singular.PageHandler(fpath)
- else:
- post = singular.ArticleHandler(fpath, category=catn)
-
- return post
-
- def collect(self):
- self.allposts = taxonomy.TaxonomyHandler()
- #self.gallery = taxonomy.TaxonomyHandler(taxonomy="photography", name="Photography")
- self.frontposts = taxonomy.TaxonomyHandler()
-
- for category in glob.conf['category'].items():
- catn, catd = category
- catp = os.path.abspath(os.path.join(glob.CONTENT, catn))
-
- if not os.path.exists(catp):
- continue
-
- logging.debug("getting posts for category %s from %s", catn, catp)
-
- cat = taxonomy.TaxonomyHandler(taxonomy='category', name=catn)
- self.category[catn] = cat
-
- for f in os.listdir(catp):
- fpath = os.path.join(catp, f)
-
- if not os.path.isfile(fpath):
- continue
-
- logging.debug("parsing %s", fpath)
- exclude = False
- if 'exclude' in catd:
- exclude = bool(catd['exclude'])
-
- ct = None
- if 'type' in catd:
- ct = catd['type']
-
- post = Engine.postbycategory(fpath, catd=ct, catn=catn)
-
- self.allposts.append(post)
- if post.dtime > arrow.utcnow().timestamp:
- logging.warning(
- "Post '%s' will be posted in the future; "
- "skipping it from Taxonomies for now", fpath
- )
- else:
- cat.append(post)
- if not exclude:
- self.frontposts.append(post)
- if hasattr(post, 'tags') and isinstance(post.tags, list):
- for tag in post.tags:
- tslug = slugify(tag, only_ascii=True, lower=True)
- if not tslug in self.tags.keys():
- t = taxonomy.TaxonomyHandler(taxonomy='tag', name=tag)
- self.tags[tslug] = t
- else:
- t = self.tags[tslug]
- t.append(post)
- elif not hasattr(post, 'tags'):
- logging.error("%s post does not have tags", post.fname)
- elif not isinstance(post.tags, list):
- logging.error(
- "%s tags are not a list, it's %s ",
- post.fname,
- type(post.tags)
- )
-
-
- for r in post.redirect.keys():
- self.allslugs.append(r)
- self.allslugs.append(post.fname)
-
-
- def renderposts(self):
- for p in self.allposts.posts.items():
- time, post = p
- post.write()
- post.redirects()
- post.pings()
- post.index(self.whoosh)
-
-
- def rendertaxonomies(self):
- for t in [self.tags, self.category]:
- for tname, tax in t.items():
- if glob.conf['category'].get(tname, False):
- if glob.conf['category'][tname].get('nocollection', False):
-
- logging.info("skipping taxonomy '%s' due to config nocollections", tname)
- continue
-
- tax.write_paginated()
- tax.index(self.whoosh)
- self.frontposts.write_paginated()
- #self.gallery.write_simple(template='gallery.html')
- self.allposts.writesitemap()
-
- def globredirects(self):
- redirects = os.path.join(glob.CONTENT,'redirects.yml')
-
- if not os.path.isfile(redirects):
- return
-
- ftime = os.stat(redirects)
- rdb = {}
- with open(redirects, 'r') as db:
- rdb = yaml.safe_load(db)
- db.close()
-
- for r_ in rdb.items():
- target, slugs = r_
- for slug in slugs:
- singular.SingularHandler.write_redirect(
- slug,
- "%s/%s" % (glob.conf['site']['url'], target),
- ftime.st_mtime
- )
-
- def recordlastrun(self):
- if os.path.exists(glob.lastrun):
- t = arrow.utcnow().timestamp
- os.utime(glob.lastrun, (t,t))
- else:
- open(glob.lastrun, 'a').close()
-
-
-if __name__ == '__main__':
-
- args = docopt(__doc__, version='generator.py 0.2')
-
- if args['--pandoc']:
- glob.CACHEENABLED = False
-
- if args['--force']:
- glob.FORCEWRITE = True
-
- if args['--regenerate']:
- glob.REGENERATE = True
-
- logform = '%(asctime)s - %(levelname)s - %(message)s'
- if args['--debug']:
- loglevel = 10
- else:
- loglevel = 40
-
-
- while len(logging.root.handlers) > 0:
- logging.root.removeHandler(logging.root.handlers[-1])
- logging.basicConfig(level=loglevel, format=logform)
-
- if args['--single']:
- logging.info("(re)generating a single item only")
- path = args['--single'].split('/')
- fpath = os.path.join(glob.CONTENT, path[0], path[1])
- post = Engine.postbycategory(fpath, catd=path[0])
- post.pings()
- post.write()
- sys.exit(0)
- else:
- eng = Engine()
- eng.initbuilder()
- eng.collect()
- eng.renderposts()
- eng.globredirects()
- eng.rendertaxonomies()
- eng.recordlastrun()
- eng.cleanup()
\ No newline at end of file
diff --git a/glob.py b/glob.py
deleted file mode 100644
index 1ab69fa..0000000
--- a/glob.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import logging
-from ruamel import yaml
-from whoosh import fields
-from whoosh import analysis
-import jinja2
-from slugify import slugify
-import arrow
-
-schema = fields.Schema(
- url=fields.ID(
- stored=True,
- ),
- title=fields.TEXT(
- stored=True,
- analyzer=analysis.FancyAnalyzer(
- )
- ),
- date=fields.DATETIME(
- stored=True,
- sortable=True
- ),
- content=fields.TEXT(
- stored=True,
- analyzer=analysis.FancyAnalyzer(
- )
- ),
- tags=fields.TEXT(
- stored=True,
- analyzer=analysis.KeywordAnalyzer(
- lowercase=True,
- commas=True
- )
- ),
- weight=fields.NUMERIC(
- sortable=True
- ),
- img=fields.TEXT(
- stored=True
- )
-)
-
-BASEDIR = os.path.dirname(os.path.abspath(__file__))
-CONFIG = os.path.abspath(os.path.join(BASEDIR, 'config.yml'))
-
-with open(CONFIG, 'r') as c:
- conf = yaml.safe_load(c)
- conf['site']['author'] = conf['author']
- c.close()
-
-secrets = os.path.abspath(os.path.join(BASEDIR, 'secret.yml'))
-if os.path.isfile(secrets):
- with open(secrets, 'r') as c:
- conf['secrets'] = yaml.safe_load(c)
- c.close()
-
-CACHEENABLED = True
-REGENERATE = False
-FORCEWRITE = False
-
-ISODATE = '%Y-%m-%dT%H:%M:%S%z'
-
-SOURCE = os.path.abspath(conf['dirs']['source']['root'])
-CONTENT = os.path.abspath(conf['dirs']['source']['content'])
-FONT = os.path.abspath(conf['dirs']['font'])
-STHEME = os.path.abspath(conf['dirs']['source']['theme'])
-SFILES = os.path.abspath(conf['dirs']['source']['files'])
-TEMPLATES = os.path.abspath(conf['dirs']['source']['templates'])
-COMMENTS = os.path.abspath(conf['dirs']['source']['comments'])
-
-TARGET = os.path.abspath(conf['dirs']['target']['root'])
-TTHEME = os.path.abspath(conf['dirs']['target']['theme'])
-TFILES = os.path.abspath(conf['dirs']['target']['files'])
-UFILES = conf['dirs']['target']['furl']
-
-CACHE = os.path.abspath(conf['dirs']['cache'])
-SEARCHDB = os.path.abspath(conf['dirs']['searchdb'])
-
-WEBMENTIONDB = os.path.abspath(conf['webmentiondb'])
-LOGDIR = os.path.abspath(conf['dirs']['log'])
-GPSDIR = os.path.abspath(conf['dirs']['gps'])
-TSDBDIR = os.path.abspath(conf['dirs']['tsdb'])
-LOCALCOPIES = os.path.abspath(conf['dirs']['localcopies'])
-
-lastrun = '/tmp/generator_last_run'
-
-os.environ.setdefault('PYPANDOC_PANDOC', '/usr/bin/pandoc')
-
-def jinja_filter_date(d, form='%Y-%m-%d %H:%m:%S'):
- if d == 'now':
- return arrow.now().strftime(form)
- if form == 'c':
- form = '%Y-%m-%dT%H:%M:%S%z'
- return d.strftime(form)
-
-def jinja_filter_slugify(s):
- return slugify(s, only_ascii=True, lower=True)
-
-def jinja_filter_search(s, r):
- if r in s:
- return True
- return False
-
-jinjaldr = jinja2.FileSystemLoader(searchpath=TEMPLATES)
-jinja2env = jinja2.Environment(loader=jinjaldr)
-
-jinja2env.filters['date'] = jinja_filter_date
-jinja2env.filters['search'] = jinja_filter_search
-jinja2env.filters['slugify'] = jinja_filter_slugify
\ No newline at end of file
diff --git a/img.py b/img.py
deleted file mode 100644
index 3156f78..0000000
--- a/img.py
+++ /dev/null
@@ -1,370 +0,0 @@
-import os
-import re
-import sys
-import json
-import shutil
-import collections
-import logging
-import imghdr
-from ctypes import c_void_p, c_size_t
-import glob
-import pyexifinfo
-from similar_text import similar_text
-from cache import Cached
-import wand.api
-import wand.image
-import wand.drawing
-import wand.color
-from PIL import Image
-#from subprocess import call
-
-# https://stackoverflow.com/questions/34617422/how-to-optimize-image-size-using-wand-in-python
-wand.api.library.MagickSetCompressionQuality.argtypes = [c_void_p, c_size_t]
-
-
-class ImageHandler(object):
- def __init__(self, fpath, alttext='', title='', imgcl='', linkto=False):
-
- self.fpath = os.path.abspath(fpath)
- path, fname = os.path.split(self.fpath)
- fname, ext = os.path.splitext(fname)
- self.fname = fname
- self.fext = ext
- self.ftime = os.stat(self.fpath)
- self.linkto = linkto
-
- self.alttext = alttext
- self.title = title
- self.imgcl = imgcl
-
- self.c = os.path.join(glob.TFILES, self.fname)
- self.u = "%s/%s/%s" % (glob.conf['site']['url'],glob.UFILES, self.fname)
-
- self.what = imghdr.what(self.fpath)
-
- self.meta = {}
-
- self.exif = {}
- if self.what == 'jpeg':
- self._setexif()
-
- self.watermark = ''
- wfile = os.path.join(glob.SOURCE, glob.conf['watermark'])
- if os.path.isfile(wfile):
- self.watermark = wfile
-
- sizes = {
- 90: {
- 'ext': 's',
- 'cropped': True,
- },
- 360: {
- 'ext': 'm',
- },
- #540: 'n',
- 720: {
- 'ext': 'z',
- },
- #980: 'c',
- 1280: {
- 'ext': 'b',
- }
- }
- self.sizes = collections.OrderedDict(sorted(sizes.items(), reverse=0))
-
- for size, meta in self.sizes.items():
- meta['path'] = "%s_%s%s" % (self.c, meta['ext'], self.fext)
- meta['url'] = "%s_%s%s" % (self.u, meta['ext'], self.fext)
- meta['mime'] = "image/%s" % (self.what)
-
-
- self._setmeta()
- self.fallbacksize = 720
- self.srcsetmin = 720
-
- self._is_photo()
-
- if self.is_photo:
- self.srcset = self.mksrcset(generate_caption=False, uphoto=False)
-
-
- def _setmeta(self):
- s = collections.OrderedDict(reversed(list(self.sizes.items())))
- for size, meta in s.items():
- if os.path.isfile(meta['path']):
- with Image.open(meta['path']) as im:
- meta['width'], meta['height'] = im.size
- meta['size'] = os.path.getsize(meta['path'])
- self.meta = meta
- break
-
-
- def downsize(self, liquidcrop=True, watermark=True):
- if not self._is_downsizeable():
- return self._copy()
-
- if not self._isneeded():
- logging.debug("downsizing not needed for %s", self.fpath)
- return
-
- logging.debug("downsizing %s", self.fpath)
- try:
- img = wand.image.Image(filename=self.fpath)
- img.auto_orient()
- except:
- print("Unexpected error:", sys.exc_info()[0])
- raise
-
- # watermark
- if self.is_photo and self.watermark and img.format == "JPEG" and watermark:
- img = self._watermark(img)
-
- elif self.linkto:
- img = self._sourceurlmark(img)
-
- # resize & cache
- for size, meta in self.sizes.items():
- self._intermediate(img, size, meta)
-
- self._setmeta()
-
-
- def _setexif(self):
- cached = Cached(text=self.fname, stime=self.ftime)
- cexif = cached.get()
-
- if cexif:
- self.exif = json.loads(cexif)
- else:
- exif = pyexifinfo.get_json(self.fpath)
- self.exif = exif.pop()
- cached.set(json.dumps(self.exif))
-
-
- def _is_photo(self):
- self.is_photo = False
- if 'cameras' in glob.conf:
- if 'EXIF:Model' in self.exif:
- if self.exif['EXIF:Model'] in glob.conf['cameras']:
- self.is_photo = True
-
- if 'copyright' in glob.conf:
- if 'IPTC:CopyrightNotice' in self.exif:
- for s in glob.conf['copyright']:
- pattern = re.compile(r'%s' % s)
- if pattern.search(self.exif['IPTC:CopyrightNotice']):
- self.is_photo = True
-
- if self.is_photo:
- #self.category = "photo"
-
- if not self.alttext:
- keywords = ['XMP:Description', 'IPTC:Caption-Abstract']
- for key in keywords:
- if key in self.exif and self.exif[key]:
- self.alttext = self.exif[key]
- break
-
- if not self.title:
- keywords = ['XMP:Title', 'XMP:Headline', 'IPTC:Headline']
- for key in keywords:
- if key in self.exif and self.exif[key]:
- self.title = self.exif[key]
- break
-
-
- def _is_downsizeable(self):
- if self.what != 'jpeg' and self.what != 'png':
- return False
- if self.imgcl:
- return False
- return True
-
-
- def _watermark(self, img):
- wmark = wand.image.Image(filename=self.watermark)
-
- if img.width > img.height:
- w = img.width * 0.16
- h = wmark.height * (w / wmark.width)
- x = img.width - w - (img.width * 0.01)
- y = img.height - h - (img.height * 0.01)
- else:
- w = img.height * 0.16
- h = wmark.height * (w / wmark.width)
- x = img.width - h - (img.width * 0.01)
- y = img.height - w - (img.height * 0.01)
-
- w = round(w)
- h = round(h)
- x = round(x)
- y = round(y)
-
- wmark.resize(w, h)
- if img.width < img.height:
- wmark.rotate(-90)
- img.composite(image=wmark, left=x, top=y)
- return img
-
-
- def _sourceurlmark(self, img):
- with wand.drawing.Drawing() as draw:
- draw.fill_color = wand.color.Color('#fff')
- draw.fill_opacity = 0.8
- draw.stroke_color = wand.color.Color('#fff')
- draw.stroke_opacity = 0.8
- r_h = round(img.height * 0.3)
- r_top = round((img.height/2) - (r_h/2))
-
- draw.rectangle(
- left=0,
- top=r_top,
- width=img.width,
- height=r_h
- )
-
- draw(img)
-
- with wand.drawing.Drawing() as draw:
- draw.font = os.path.join(glob.FONT)
- draw.font_size = round((img.width)/len(self.linkto)*1.5)
- draw.gravity = 'center'
- draw.text(
- 0,
- 0,
- self.linkto
- )
- draw(img)
- return img
-
-
- def _copy(self):
- p = self.c + self.fext
- if not os.path.isfile(p):
- logging.debug("copying %s" % self.fpath)
- shutil.copy(self.fpath, p)
- return
-
-
- def _isneeded(self):
- # skip existing
- needed = False
- if glob.REGENERATE:
- needed = True
- else:
- for size, meta in self.sizes.items():
- if not os.path.isfile(meta['path']):
- needed = True
-
- return needed
-
-
- def _intermediate_dimensions(self, img, size, meta):
- if (img.width > img.height and 'crop' not in meta) \
- or (img.width < img.height and 'crop' in meta):
- width = size
- height = int(float(size / img.width) * img.height)
- else:
- height = size
- width = int(float(size / img.height) * img.width)
-
- return (width, height)
-
-
- def _intermediate_symlink(self, meta):
- # create a symlink to the largest resize with the full filename;
- # this is to ensure backwards compatibility and avoid 404s
- altsrc = meta['path']
- altdst = self.c + self.fext
-
- if not os.path.islink(altdst):
- if os.path.isfile(altdst):
- os.unlink(altdst)
- os.symlink(altsrc, altdst)
-
-
- def _intermediate(self, img, size, meta):
- # skip existing unless regenerate needed
- if os.path.isfile(meta['path']) and not glob.REGENERATE:
- return
-
- # too small images: move on
- #if size > img.height and size > img.width:
- # return
- width, height = self._intermediate_dimensions(img, size, meta)
-
- try:
- thumb = img.clone()
- thumb.resize(width, height)
- #thumb.resize(width, height, filter='robidouxsharp')
-
- if 'crop' in meta and liquidcrop:
- thumb.liquid_rescale(size, size, 1, 1)
- elif 'crop' in meta:
- l = t = 0
- if width > size:
- l = int((width - size) / 2)
- if height > size:
- t = int((height - size) / 2)
- thumb.crop(left=l, top=t, width=size, height=size)
-
- if img.format == "PNG":
- library.MagickSetCompressionQuality(img.wand, 75)
-
- if img.format == "JPEG":
- thumb.compression_quality = 86
- thumb.unsharp_mask(radius=0, sigma=0.5, amount=1, threshold=0.03)
- thumb.format = 'pjpeg'
-
- # this is to make sure pjpeg happens
- with open(meta['path'], 'wb') as f:
- thumb.save(file=f)
-
- if size == list(self.sizes.keys())[-1]:
- self._intermediate_symlink(meta)
-
- #if img.format == "JPEG":
- ## this one strips the embedded little jpg
- #call(['/usr/bin/jhead', '-dt', '-q', cpath])
-
- except:
- print("Unexpected error:", sys.exc_info()[0])
- raise
-
-
- def mksrcset(self, generate_caption=True, uphoto=False):
- if not self._is_downsizeable():
- return False
-
- for size, meta in self.sizes.items():
- if 'crop' in meta:
- continue
-
- # increase fallback until max fallback reached
- if size <= self.fallbacksize:
- fallback = meta['url']
-
- # set target for the largest
- target = meta['url']
-
- if uphoto:
- uphotoclass=' u-photo'
- else:
- uphotoclass=''
- caption = ''
-
- if not self.imgcl:
- cl = ''
- else:
- cl = self.imgcl
-
- if self.alttext \
- and similar_text(self.alttext, self.fname) < 90 \
- and similar_text(self.alttext, self.fname + '.' + self.fext) < 90 \
- and generate_caption:
- caption = '%s' % (self.alttext)
-
- if self.linkto:
- target = self.linkto
-
- return '' % (uphotoclass, target, fallback, self.imgcl, self.alttext, caption)
\ No newline at end of file
diff --git a/nasg.py b/nasg.py
new file mode 100644
index 0000000..ebac014
--- /dev/null
+++ b/nasg.py
@@ -0,0 +1,203 @@
+import argparse
+import logging
+import os
+import re
+import arrow
+import atexit
+from concurrent.futures import ProcessPoolExecutor
+from multiprocessing import cpu_count
+from slugify import slugify
+
+import nasg.config as config
+import nasg.singular as singular
+import nasg.searchindex as searchindex
+import nasg.taxonomy as taxonomy
+
+from pprint import pprint
+
+parser = argparse.ArgumentParser(description='Parameters for NASG')
+parser.add_argument(
+ '--regenerate', '-f',
+ dest='regenerate',
+ action='store_true',
+ default=False,
+ help='force regeneration of all HTML outputs'
+)
+parser.add_argument(
+ '--downsize', '-c',
+ action='store_true',
+ dest='downsize',
+ default=False,
+ help='force re-downsizing of all suitable images'
+)
+parser.add_argument(
+ '--debug', '-d',
+ action='store_true',
+ dest='debug',
+ default=False,
+ help='turn on debug log'
+)
+
+class Engine(object):
+ def __init__(self):
+ self._initdirs()
+ self._lock()
+ atexit.register(self._lock, action='clear')
+ self.files = []
+ self.categories = {}
+ self.tags = {}
+ self.allposts = taxonomy.TaxonomyHandler('')
+ self.frontposts = taxonomy.TaxonomyHandler('')
+ self.allowedpattern = re.compile(config.accept_sourcefiles)
+ self.counter = {}
+
+ def _parse_results(self, futures):
+ for future in futures:
+ try:
+ future.result()
+ except Exception as e:
+ logging.error("processing failed: %s", e)
+
+
+ def collect(self):
+ self._setup_categories()
+ self._setup_singulars()
+
+
+ def render(self):
+ self._render_singulars()
+ #self._render_taxonomy()
+
+
+ def _render_singulars(self):
+ logging.warning("rendering singulars")
+ pprint(self.allposts)
+ #futures = []
+ #with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
+ for p in self.allposts:
+ #futures.append(executor.submit(p.write))
+ p.write()
+ #for future in futures:
+ #try:
+ #future.result()
+ #except Exception as e:
+ #logging.error("processing failed: %s", e)
+
+
+ def _render_taxonomy(self):
+ futures = []
+ with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
+ for tslug, t in self.tags.items():
+ #t.write()
+ futures.append(executor.submit(t.write))
+ for cslug, c in self.categories.items():
+ #c.write()
+ futures.append(executor.submit(c.write))
+ #self.frontposts.write()
+ futures.append(executor.submit(self.frontposts.write))
+ self._parse_results(futures)
+
+
+ def _setup_categories(self):
+ for cat, meta in config.categories.items():
+ cpath = os.path.join(config.CONTENT, cat)
+ if not os.path.isdir(cpath):
+ logging.error("category %s not found at: %s", cat, cpath)
+ continue
+
+ self.categories[cat] = taxonomy.TaxonomyHandler(
+ meta.get('name', cat),
+ taxonomy=meta.get('type', 'category'),
+ slug=cat,
+ render=meta.get('render', True)
+ )
+
+
+ def _setup_singulars(self):
+ futures = []
+ with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
+ for slug, tax in self.categories.items():
+ cpath = os.path.join(config.CONTENT, slug)
+ for f in os.listdir(cpath):
+ fpath = os.path.join(cpath,f)
+ if not self.allowedpattern.fullmatch(f):
+ logging.warning("unexpected file at: %s" % fpath)
+ continue
+ #self._posttype(fpath, slug)
+ futures.append(executor.submit(self._posttype, fpath, slug))
+ self._parse_results(futures)
+
+ def _posttype(self, fpath, cat):
+ c = self.categories[cat]
+
+ if re.match('.*\.jpg', fpath):
+ p = singular.PhotoHandler(fpath)
+ elif 'page' == c.taxonomy:
+ p = singular.PageHandler(fpath)
+ else:
+ p = singular.ArticleHandler(fpath)
+
+ c.append(p)
+ self.allposts.append(p)
+
+ front = config.categories[cat].get('front', True)
+ if front:
+ self.frontposts.append(p)
+
+ ptags = p.vars.get('tags', [])
+ for tag in ptags:
+ tslug = slugify(tag, only_ascii=True, lower=True)
+ if tslug not in self.tags:
+ self.tags[tslug] = taxonomy.TaxonomyHandler(
+ tag,
+ taxonomy='tag',
+ slug=tslug
+ )
+ self.tags[tslug].append(p)
+
+
+ def _initdirs(self):
+ for d in [
+ config.TARGET,
+ config.TTHEME,
+ config.TFILES,
+ config.VAR,
+ config.SEARCHDB,
+ config.TSDB,
+ config.LOGDIR
+ ]:
+ if not os.path.exists(d):
+ os.mkdir(d)
+
+
+ def _lock(self, action='set'):
+ if 'set' == action:
+ if os.path.exists(config.LOCKFILE):
+ raise ValueError("lockfile %s present" % config.LOCKFILE)
+ with open(config.LOCKFILE, "wt") as l:
+ l.write("%s" % arrow.utcnow())
+ l.close()
+ elif 'clear' == action:
+ if os.path.exists(config.LOCKFILE):
+ os.unlink(config.LOCKFILE)
+ else:
+ return os.path.exists(config.LOCKFILE)
+
+
+if __name__ == '__main__':
+ config.options.update(vars(parser.parse_args()))
+ loglevel = 30
+ if config.options['debug']:
+ loglevel = 10
+
+ while len(logging.root.handlers) > 0:
+ logging.root.removeHandler(logging.root.handlers[-1])
+
+ logging.basicConfig(
+ level=loglevel,
+ format='%(asctime)s - %(levelname)s - %(message)s'
+ )
+
+ engine = Engine()
+ engine.collect()
+ engine.render()
\ No newline at end of file
diff --git a/nasg/__init__.py b/nasg/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nasg/cmdline.py b/nasg/cmdline.py
new file mode 100644
index 0000000..595b713
--- /dev/null
+++ b/nasg/cmdline.py
@@ -0,0 +1,115 @@
+import subprocess
+import os
+import json
+import logging
+
+
+class CommandLine(object):
+ def __init__(self, cmd, stdin=''):
+ self.cmd = cmd.split(' ')
+ self.stdin = stdin
+ self.stdout = ''
+ self.binary = None
+ self._which()
+
+ if not self.binary:
+ raise ValueError('%s binary was not found in PATH' % self.cmd[0])
+
+ # based on: http://stackoverflow.com/a/377028/673576
+ def _which(self):
+ if self._is_exe(self.cmd[0]):
+ self.binary = self.cmd[0]
+ return
+
+ for path in os.environ["PATH"].split(os.pathsep):
+ path = path.strip('"')
+ fpath = os.path.join(path, self.cmd[0])
+ if self._is_exe(fpath):
+ self.binary = self.cmd[0] = fpath
+ return
+
+ def _is_exe(self, fpath):
+ return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
+
+ def run(self):
+ p = subprocess.Popen(
+ self.cmd,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=os.environ.copy()
+ )
+ stdout, stderr = p.communicate(self.stdin.encode('utf-8'))
+ self.stdout = stdout.decode('utf-8').strip()
+ return self
+
+
+class Exiftool(CommandLine):
+ def __init__(self, fpath = ''):
+ self.fpath = fpath
+ cmd ="/usr/local/bin/exiftool -json -sort -groupNames %s" % (fpath)
+ super(Exiftool, self).__init__(cmd)
+
+ def get(self):
+ self.run()
+ exif = {}
+ try:
+ exif = json.loads(self.stdout)[0]
+ except json.JSONDecodeError as e:
+ logging.error("Error when decoding JSON returned from exiftool: %s" % e)
+ pass
+
+ return exif
+
+
+class Pandoc(CommandLine):
+ """ Use: Pandoc.[formatter function].get()
+ available formatter functions:
+ - md2html: from markdown extra to html5
+ - html2md: from html5 to simple markdown
+
+ The default is plain markdown to html5 (if no formatter function added)
+ """
+
+ def __init__(self, text):
+ self.stdin = text
+ self.format_in = 'markdown'
+ self.format_out = 'html5'
+ self.stdout = ''
+
+ def md2html(self):
+ self.format_in = "markdown+" + "+".join([
+ 'backtick_code_blocks',
+ 'auto_identifiers',
+ 'fenced_code_attributes',
+ 'definition_lists',
+ 'grid_tables',
+ 'pipe_tables',
+ 'strikeout',
+ 'superscript',
+ 'subscript',
+ 'markdown_in_html_blocks',
+ 'shortcut_reference_links',
+ 'autolink_bare_uris',
+ 'raw_html',
+ 'link_attributes',
+ 'header_attributes',
+ 'footnotes',
+ ])
+ return self
+
+
+ def html2md(self):
+ self.format_out = "markdown-" + "-".join([
+ 'raw_html',
+ 'native_divs',
+ 'native_spans',
+ ])
+ return self
+
+
+ def get(self):
+ cmd = "/usr/bin/pandoc -o- --from=%s --to=%s" % (self.format_in, self.format_out)
+ super(Pandoc, self).__init__(cmd, stdin=self.stdin)
+ self.run()
+ return self.stdout
\ No newline at end of file
diff --git a/nasg/func.py b/nasg/func.py
new file mode 100644
index 0000000..f0f5009
--- /dev/null
+++ b/nasg/func.py
@@ -0,0 +1,21 @@
+import re
+
+def gps2dec(exifgps, ref=None):
+ pattern = re.compile(r"(?P[0-9.]+)\s+deg\s+(?P[0-9.]+)'\s+(?P[0-9.]+)\"(?:\s+(?P[NEWS]))?")
+ v = pattern.match(exifgps).groupdict()
+
+ dd = float(v['deg']) + (((float(v['min']) * 60) + (float(v['sec']))) / 3600)
+ if ref == 'West' or ref == 'South' or v['dir'] == "S" or v['dir'] == "W":
+ dd = dd * -1
+ return round(dd, 6)
+
+def baseN(num, b=36, numerals="0123456789abcdefghijklmnopqrstuvwxyz"):
+ """ Used to create short, lowecase slug for a number (an epoch) passed """
+ num = int(num)
+ return ((num == 0) and numerals[0]) or (
+ baseN(
+ num // b,
+ b,
+ numerals
+ ).lstrip(numerals[0]) + numerals[num % b]
+ )
\ No newline at end of file
diff --git a/nasg/img.py b/nasg/img.py
new file mode 100644
index 0000000..5e5ce8e
--- /dev/null
+++ b/nasg/img.py
@@ -0,0 +1,297 @@
+import os
+import re
+import shutil
+import logging
+import imghdr
+from similar_text import similar_text
+import wand.api
+import wand.image
+import wand.drawing
+import wand.color
+
+import nasg.config as config
+from nasg.cmdline import Exiftool
+
+
+class ImageHandler(object):
+
+ sizes = {
+ 90: {
+ 'ext': 's',
+ 'crop': True,
+ },
+ 360: {
+ 'ext': 'm',
+ },
+ 720: {
+ 'ext': 'z',
+ 'fallback': True
+ },
+ 1280: {
+ 'ext': 'b',
+ }
+ }
+
+ def __init__(self, fpath, alttext='', title='', imgcl='', linkto=False):
+ logging.info("parsing image: %s" % fpath)
+ self.fpath = os.path.abspath(fpath)
+ self.fname, self.ext = os.path.splitext(os.path.basename(fpath))
+
+ self.linkto = linkto
+ self.alttext = alttext
+ self.title = title
+ self.imgcl = imgcl
+ self.what = imghdr.what(self.fpath)
+ self.mime = "image/%s" % (self.what)
+ self.exif = {}
+ self.is_photo = False
+ if self.what == 'jpeg':
+ self._setexif()
+ self._is_photo()
+ self.is_downsizeable = False
+ if not self.imgcl:
+ if self.what == 'jpeg' or self.what == 'png':
+ self.is_downsizeable = True
+ self.sizes = sorted(self.sizes.items())
+ for size, meta in self.sizes:
+ meta['fname'] = "%s_%s%s" % (
+ self.fname,
+ meta['ext'],
+ self.ext
+ )
+ meta['fpath'] = os.path.join(
+ config.TFILES,
+ meta['fname']
+ )
+ meta['url'] = "%s/%s/%s" % (
+ config.site['url'],
+ config.UFILES,
+ meta['fname']
+ )
+ if 'fallback' in meta:
+ self.fallback = meta['url']
+ self.targeturl = meta['url']
+
+
+ def featured(self):
+ # sizes elements are tuples: size, meta
+ return {
+ 'mime': self.mime,
+ 'url': self.sizes[-1][1]['url'],
+ 'bytes': os.path.getsize(self.sizes[-1][1]['fpath'])
+ }
+
+
+ def _setexif(self):
+ self.exif = Exiftool(self.fpath).get()
+
+
+ def _is_photo(self):
+ model = self.exif.get('EXIF:Model', None)
+ if hasattr(config, 'cameras') and \
+ model in config.cameras:
+ self.is_photo = True
+ return
+
+ cprght = self.exif.get('IPTC:CopyrightNotice', '')
+ if hasattr(config, 'copyr'):
+ for s in config.copyr:
+ pattern = re.compile(r'%s' % s)
+ if pattern.match(cprght):
+ self.is_photo = True
+ return
+
+
+ def _watermark(self, img):
+ if 'watermark' not in config.options:
+ return img
+ if not os.path.isfile(config.options['watermark']):
+ return img
+
+ wmark = wand.image.Image(filename=config.options['watermark'])
+
+ if img.width > img.height:
+ w = img.width * 0.16
+ h = wmark.height * (w / wmark.width)
+ x = img.width - w - (img.width * 0.01)
+ y = img.height - h - (img.height * 0.01)
+ else:
+ w = img.height * 0.16
+ h = wmark.height * (w / wmark.width)
+ x = img.width - h - (img.width * 0.01)
+ y = img.height - w - (img.height * 0.01)
+
+ w = round(w)
+ h = round(h)
+ x = round(x)
+ y = round(y)
+
+ wmark.resize(w, h)
+ if img.width < img.height:
+ wmark.rotate(-90)
+ img.composite(image=wmark, left=x, top=y)
+ return img
+
+
+ def _sourceurlmark(self, img):
+ with wand.drawing.Drawing() as draw:
+ draw.fill_color = wand.color.Color('#fff')
+ draw.fill_opacity = 0.8
+ draw.stroke_color = wand.color.Color('#fff')
+ draw.stroke_opacity = 0.8
+ r_h = round(img.height * 0.3)
+ r_top = round((img.height/2) - (r_h/2))
+
+ draw.rectangle(
+ left=0,
+ top=r_top,
+ width=img.width,
+ height=r_h
+ )
+
+ draw(img)
+
+ with wand.drawing.Drawing() as draw:
+ draw.font = config.FONT
+ draw.font_size = round((img.width)/len(self.linkto)*1.5)
+ draw.gravity = 'center'
+ draw.text(
+ 0,
+ 0,
+ self.linkto
+ )
+ draw(img)
+ return img
+
+ def downsize(self):
+ if not self.is_downsizeable:
+ return self._copy()
+ if not self._isneeded():
+ logging.debug("downsizing not needed for %s", self.fpath)
+ return
+
+ logging.debug("downsizing %s", self.fpath)
+ try:
+ img = wand.image.Image(filename=self.fpath)
+ img.auto_orient()
+ except ValueError as e:
+ logging.error("opening %s with wand failed: %s", self.fpath, e)
+ return
+
+ if self.is_photo:
+ img = self._watermark(img)
+ elif self.linkto:
+ img = self._sourceurlmark(img)
+
+ for size, meta in self.sizes:
+ self._intermediate(img, size, meta)
+
+ #self._setmeta()
+
+
+ def _copy(self):
+ target = os.path.join(
+ config.TFILES,
+ "%s%s" % (self.fname, self.ext)
+ )
+ if os.path.isfile(target) and \
+ not config.options['downsize']:
+ return
+
+ logging.debug("copying %s to %s", self.fpath, target)
+ shutil.copy(self.fpath, target)
+
+
+ def _isneeded(self):
+ if config.options['downsize']:
+ return True
+ for size, meta in self.sizes:
+ if not os.path.isfile(meta['fpath']):
+ return True
+
+
+ def _intermediate_dimensions(self, img, size, meta):
+ if (img.width > img.height and 'crop' not in meta) \
+ or (img.width < img.height and 'crop' in meta):
+ width = size
+ height = int(float(size / img.width) * img.height)
+ else:
+ height = size
+ width = int(float(size / img.height) * img.width)
+
+ return (width, height)
+
+
+ def _intermediate(self, img, size, meta):
+ if os.path.isfile(meta['fpath']) and \
+ not config.options['downsize']:
+ return
+
+ try:
+ thumb = img.clone()
+ width, height = self._intermediate_dimensions(img, size, meta)
+ thumb.resize(width, height)
+
+ if 'crop' in meta:
+ if 'liquidcrop' in config.options and \
+ config.options['liquidcrop']:
+ thumb.liquid_rescale(size, size, 1, 1)
+ else:
+ l = t = 0
+ if width > size:
+ l = int((width - size) / 2)
+ if height > size:
+ t = int((height - size) / 2)
+ thumb.crop(left=l, top=t, width=size, height=size)
+
+ if img.format == "JPEG":
+ thumb.compression_quality = 86
+ thumb.unsharp_mask(
+ radius=0,
+ sigma=0.5,
+ amount=1,
+ threshold=0.03
+ )
+ thumb.format = 'pjpeg'
+
+
+ # this is to make sure pjpeg happens
+ with open(meta['fpath'], 'wb') as f:
+ thumb.save(file=f)
+
+ except ValueError as e:
+ logging.error("error while downsizing %s: %s", self.fpath, e)
+ return
+
+
+ def srcset(self, generate_caption=True, uphoto=False):
+ if not self.is_downsizeable:
+ return False
+
+ uphotoclass=''
+ if uphoto:
+ uphotoclass=' u-photo'
+
+ cl = ''
+ if self.imgcl:
+ cl = self.imgcl
+
+ caption = ''
+ if self.alttext \
+ and similar_text(self.alttext, self.fname) < 90 \
+ and similar_text(self.alttext, self.fname + '.' + self.ext) < 90 \
+ and generate_caption:
+ caption = '%s' % (self.alttext)
+
+ if self.linkto:
+ target = self.linkto
+
+ # don't put linebreaks in this: Pandoc tends to evaluate them
+ return '' % (
+ uphotoclass,
+ self.targeturl,
+ self.fallback,
+ self.imgcl,
+ self.alttext,
+ caption
+ )
\ No newline at end of file
diff --git a/nasg/img_test.py b/nasg/img_test.py
new file mode 100644
index 0000000..e69de29
diff --git a/nasg/jinjaenv.py b/nasg/jinjaenv.py
new file mode 100644
index 0000000..53880dc
--- /dev/null
+++ b/nasg/jinjaenv.py
@@ -0,0 +1,29 @@
+import arrow
+import jinja2
+from slugify import slugify
+import nasg.config as config
+
+JINJA2ENV = jinja2.Environment(
+ loader=jinja2.FileSystemLoader(
+ searchpath=config.TEMPLATES
+ )
+)
+
+def jinja_filter_date(d, form='%Y-%m-%d %H:%m:%S'):
+ if d == 'now':
+ return arrow.now().datetime.strftime(form)
+ if form == 'c':
+ form = '%Y-%m-%dT%H:%M:%S%z'
+ return d.strftime(form)
+
+def jinja_filter_slugify(s):
+ return slugify(s, only_ascii=True, lower=True)
+
+def jinja_filter_search(s, r):
+ if r in s:
+ return True
+ return False
+
+JINJA2ENV.filters['date'] = jinja_filter_date
+JINJA2ENV.filters['search'] = jinja_filter_search
+JINJA2ENV.filters['slugify'] = jinja_filter_slugify
\ No newline at end of file
diff --git a/nasg/searchindex.py b/nasg/searchindex.py
new file mode 100644
index 0000000..82cd7ed
--- /dev/null
+++ b/nasg/searchindex.py
@@ -0,0 +1,76 @@
+from whoosh import fields
+from whoosh import analysis
+from whoosh import index
+import tempfile
+import atexit
+import shutil
+import nasg.config as config
+
+class SearchIndex(object):
+ schema = fields.Schema(
+ url=fields.ID(
+ stored=True,
+ ),
+ title=fields.TEXT(
+ stored=True,
+ analyzer=analysis.FancyAnalyzer(
+ )
+ ),
+ date=fields.DATETIME(
+ stored=True,
+ sortable=True
+ ),
+ content=fields.TEXT(
+ stored=True,
+ analyzer=analysis.FancyAnalyzer(
+ )
+ ),
+ tags=fields.TEXT(
+ stored=True,
+ analyzer=analysis.KeywordAnalyzer(
+ lowercase=True,
+ commas=True
+ )
+ ),
+ weight=fields.NUMERIC(
+ sortable=True
+ ),
+ img=fields.TEXT(
+ stored=True
+ )
+ )
+
+
+ def __init__(self):
+ self.tmp = tempfile.mkdtemp('whooshdb_', dir=tempfile.gettempdir())
+ self.ix = index.create_in(self.tmp, self.schema)
+ atexit.register(self.cleanup)
+
+
+ def add(self, vars):
+ ix = self.ix.writer()
+ ix.add_document(
+ title=vars['title'],
+ url=vars['url'],
+ content=vars['content'],
+ date=vars['published'],
+ tags=vars['tags'],
+ weight=1,
+ img=vars['img']
+ )
+ ix.commit()
+
+
+ def cleanup(self):
+ if not os.path.exists(self.tmp):
+ return
+
+ logging.warning("cleaning up tmp whoosh")
+ shutil.rmtree(self.tmp)
+
+
+ def save(self):
+ logging.info("deleting old searchdb")
+ shutil.rmtree(config.SEARCHDB)
+ logging.info("moving new searchdb")
+ shutil.move(self.tmp, config.SEARCHDB)
\ No newline at end of file
diff --git a/nasg/singular.py b/nasg/singular.py
new file mode 100644
index 0000000..ac0b283
--- /dev/null
+++ b/nasg/singular.py
@@ -0,0 +1,580 @@
+import os
+import re
+import logging
+import arrow
+import frontmatter
+import langdetect
+from slugify import slugify
+
+import nasg.config as config
+import nasg.func as func
+import nasg.cmdline as cmdline
+from nasg.img import ImageHandler
+import nasg.jinjaenv as jinjaenv
+
+class SingularHandler(object):
+ def __init__(self, fpath):
+ logging.info("setting up singular from %s", fpath)
+ self.fpath= os.path.abspath(fpath)
+ self.fname, self.ext = os.path.splitext(os.path.basename(self.fpath))
+ self.target = os.path.join(
+ config.TARGET, "%s" % (self.fname), "index.html"
+ )
+
+ slug = slugify(self.fname, only_ascii=True, lower=True)
+ self.modtime = int(os.path.getmtime(self.fpath))
+ self.category = os.path.dirname(self.fpath).replace(config.CONTENT, '').strip('/')
+
+ self.vars = {
+ 'category': self.category,
+ 'tags': [],
+ 'published': arrow.get(self.modtime),
+ 'updated': arrow.get(0),
+ 'author': config.author,
+ 'title': '',
+ 'raw_summary': '',
+ 'raw_content': '',
+ 'content': '',
+ 'summary': '',
+ 'reactions': {},
+ 'exif': {},
+ 'lang': config.site['lang'],
+ #'syndicate': [],
+ 'slug': slug,
+ 'shortslug': slug,
+ 'srcset': '',
+ 'url': "%s/%s/" % (config.site['url'], slug),
+ }
+
+ self.redirects = {}
+ self.pings = {}
+ self.template = 'singular.html'
+ self.img = None
+ self.rendered = ''
+
+
+ def __repr__(self):
+ return "Post '%s' (%s @ %s)" % (
+ self.vars['title'],
+ self.fname,
+ self.fpath
+ )
+
+
+ def _modtime(self):
+ """ Set file mtime in case it doesn't match the in-file publish or updated time """
+
+ use = 'published'
+ if self.vars['updated'].timestamp > self.vars['published'].timestamp:
+ use = 'updated'
+
+ self.modtime = int(self.vars[use].timestamp)
+ stattime = int(os.path.getmtime(self.fpath))
+ if stattime != self.modtime:
+ os.utime(self.fpath, (self.modtime, self.modtime))
+
+
+ def _detect_lang(self):
+ # try to detect language, ignore failures
+ try:
+ self.vars['lang'] = langdetect.detect(
+ "%s %s" % (
+ self.vars['title'],
+ self.vars['raw_content']
+ )
+ )
+ except:
+ pass
+
+
+ def _redirects(self):
+ if self.category in config.categories and \
+ 'nocollection' in config.categories[self.category] and \
+ config.categories[self.category]['nocollection']:
+ return
+
+ self.redirects[self.vars['shortslug']] = 1
+
+
+ def _shortslug(self):
+ shortslug = func.baseN(self.vars['published'].timestamp)
+ self.vars['shortslug'] = shortslug
+
+
+ def _prerender(self):
+ for s in ['content', 'summary']:
+ self.vars[s] = cmdline.Pandoc(self.vars[s]).md2html().get()
+
+
+ def _postsetup(self):
+ for s in ['content', 'summary']:
+ if not self.vars[s]:
+ self.vars[s] = self.vars['raw_%s' % s]
+
+ self._modtime()
+ self._shortslug()
+ self._detect_lang()
+ self._redirects()
+ self._pings()
+
+
+ def _render(self):
+ self._prerender()
+ tmpl = jinjaenv.JINJA2ENV.get_template(self.template)
+ logging.info("rendering %s", self.fname)
+ tmplvars = {
+ 'post': self.vars,
+ 'site': config.site,
+ 'taxonomy': {},
+ }
+ self.rendered = tmpl.render(tmplvars)
+
+
+ def _exists(self):
+ """ check if target exists and up to date """
+
+ if config.options['regenerate']:
+ logging.debug('REGENERATE active')
+ return False
+
+ if not os.path.isfile(self.target):
+ logging.debug('%s missing', self.target)
+ return False
+
+ ttime = os.stat(self.target)
+ if self.modtime == ttime.st_mtime:
+ logging.debug('%s exist and up to date', self.target)
+ return True
+
+ return False
+
+
+ def write(self):
+ """ Write HTML file """
+
+ if self._exists():
+ logging.info("skipping existing %s", self.target)
+ return
+
+ self._render()
+ d = os.path.dirname(self.target)
+ if not os.path.isdir(d):
+ os.mkdir(d)
+
+ with open(self.target, "wt") as html:
+ logging.info("writing %s", self.target)
+ html.write(self.rendered)
+ html.close()
+ os.utime(self.target, (self.modtime, self.modtime))
+
+
+ def indexvars(self):
+ """ Return values formatter for search index """
+
+ c = "%s %s %s %s %s" % (
+ self.vars['slug'],
+ self.vars['raw_summary'],
+ self.vars['raw_content'],
+ self.vars['reactions'],
+ self.vars['exif']
+ )
+
+ #c = "%s %s" % (c, self._localcopy_include())
+
+ imgstr = ''
+ if self.img:
+ imgstr = self.img.mksrcset(generate_caption=False)
+
+ ivars = {
+ 'title': self.vars['title'],
+ 'url': self.vars['url'],
+ 'content': c,
+ 'date': self.vars['published'].datetime,
+ 'tags': ",".join(self.vars['tags']),
+ 'img': imgstr
+ }
+
+ return ivars
+
+ def _pings(self):
+ """ Extract all URLs that needs pinging """
+
+ urlregex = re.compile(
+ r'\s+https?\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+'
+ r'\.[a-zA-Z0-9\.\/\?\:@\-_=#]*'
+ )
+ urls = re.findall(urlregex, self.vars['raw_content'])
+
+ for r in self.vars['reactions'].items():
+ reactiontype, reactions = r
+ if isinstance(reactions, str):
+ urls.append(reactions)
+ elif isinstance(reactions, list):
+ urls = [*reactions, *urls]
+
+ #for s in self.syndicate.keys():
+ #matches.append('https://brid.gy/publish/%s' % (s))
+
+ urlredux = {}
+ for url in urls:
+ # exclude local matches
+ if config.site['domain'] in url:
+ continue
+ urlredux[url] = 1
+
+ self.pings = urlredux
+
+
+ def _c_adaptify_altfpath(self, fname):
+ for c, cmeta in config.categories.items():
+ tpath = os.path.join(config.CONTENT, c, fname)
+ if os.path.isfile(tpath):
+ return tpath
+ return None
+
+
+ def _c_adaptify(self):
+ """ Generate srcset for all suitable images """
+
+ linkto = False
+ isrepost = None
+
+ if len(self.vars['reactions'].keys()):
+ isrepost = list(self.vars['reactions'].keys())[0]
+ if isrepost and \
+ len(self.vars['reactions'][isrepost]) == 1:
+ linkto = self.vars['reactions'][isrepost][0]
+
+ p = re.compile(
+ r'(!\[(.*)\]\((?:\/(?:files|cache)'
+ r'(?:\/[0-9]{4}\/[0-9]{2})?\/(.*\.(?:jpe?g|png|gif)))'
+ r'(?:\s+[\'\"]?(.*?)[\'\"]?)?\)(?:\{(.*?)\})?)'
+ , re.IGNORECASE)
+
+ m = p.findall(self.vars['content'])
+ if not m:
+ return
+
+ for shortcode, alt, fname, title, cl in m:
+ fpath = os.path.join(config.SFILES, fname)
+ if not os.path.isfile(fpath):
+ fpath = self._c_adaptify_altfpath(fname)
+ if not fpath:
+ logging.error("missing image in %s: %s", self.fpath, fname)
+ continue
+
+ im = ImageHandler(
+ fpath,
+ alttext=alt,
+ title=title,
+ imgcl=cl,
+ linkto=linkto
+ )
+
+ im.downsize()
+ srcset = im.srcset()
+ if srcset:
+ self.vars['content'] = self.vars['content'].replace(
+ shortcode, srcset
+ )
+
+ del(im)
+
+
+ def _c_video(self):
+ """ [video] shortcode extractor """
+
+ p = re.compile(
+ r'(\[video mp4=\"(?:/(?:files|cache)\/(?P.*?))\"\]'
+ r'(?:\[/video\])?)'
+ )
+
+ videos = p.findall(self.vars['content'])
+ if not videos:
+ return
+
+ for shortcode, vidf in videos:
+ video = '' % (
+ config.site['url'],
+ vidf
+ )
+ self.vars['content'] = self.vars['content'].replace(shortcode, video)
+
+
+ def _c_snippets(self):
+ """ Replaces [git:(repo)/(file.ext)] with corresponding code snippet """
+
+ p = re.compile(r'(\[git:([^\/]+)\/([^\]]+\.([^\]]+))\])')
+ snippets = p.findall(self.vars['content'])
+ if not snippets:
+ return
+
+ for shortcode, d, f, ext in snippets:
+ fpath = os.path.join(config.SOURCE, d, f)
+ if not os.path.isfile(fpath):
+ logging.error("missing blogsnippet: %s", self.fpath)
+ continue
+
+ if re.compile(r'conf', re.IGNORECASE).match(ext):
+ lang = 'apache'
+ else:
+ lang = ext
+
+ with open(fpath, "rt") as snip:
+ c = snip.read()
+ snip.close
+
+ c = "\n\n```%s\n%s\n```\n" % (lang, c)
+ logging.debug("replacing blogsnippet %s", self.fpath)
+ self.vars['content'] = self.vars['content'].replace(
+ shortcode, c
+ )
+
+
+ #def _c_files(self):
+ #""" Copy misc files referenced """
+
+ #match = re.compile(
+ #r'\s(?:%s)?/(?:files|cache)'
+ #r'/.*\.(?:(?!jpe?g|png|gif).*)\s' % (glob.conf['site']['domain'])
+ #)
+ #split = re.compile(
+ #r'\s(?:%s)?/((?:files|cache)'
+ #r'/(.*\.(?:(?!jpe?g|png|gif).*)))\s' % (glob.conf['site']['domain'])
+ #)
+ ##files = re.findall(match, self.content)
+ ##print(files)
+
+
+class ArticleHandler(SingularHandler):
+ def __init__(self, *args, **kwargs):
+ super(ArticleHandler, self).__init__(*args, **kwargs)
+ self._setup()
+
+ def _setup(self):
+ post = frontmatter.load(self.fpath)
+ self.vars['raw_content'] = "%s" % post.content
+ self.vars['content'] = "%s" % post.content
+
+ if 'tags' in post.metadata:
+ self.vars['tags'] = post.metadata['tags']
+
+ if 'title' in post.metadata:
+ self.vars['title'] = post.metadata['title']
+
+ if 'published' in post.metadata:
+ self.vars['published'] = arrow.get(post.metadata['published'])
+
+ if 'updated' in post.metadata:
+ self.vars['updated'] = arrow.get(post.metadata['updated'])
+
+ if 'summary' in post.metadata:
+ self.vars['raw_summary'] = post.metadata['summary']
+ self.vars['summary'] = "%s" % post.metadata['summary']
+
+ if 'redirect' in post.metadata and \
+ isinstance(post.metadata['redirect'], list):
+ for r in post.metadata['redirect']:
+ self.redirects[r.strip().strip('/')] = 1
+
+ #if 'syndicate' in post.metadata:
+ #z = post.metadata['syndicate']
+ #if isinstance(z, str):
+ #self.syndicate[z] = ''
+ #elif isinstance(z, dict):
+ #for s, c in z.items():
+ #self.syndicate[s] = c
+ #elif isinstance(z, list):
+ #for s in z:
+ #self.syndicate[s] = ''
+
+ self.vars['reactions'] = {}
+ # getting rid of '-' to avoid css trouble and similar
+ rmap = {
+ 'bookmark-of': 'bookmark',
+ 'repost-of': 'repost',
+ 'in-reply-to': 'reply',
+ }
+
+ for x in rmap.items():
+ key, replace = x
+ if key in post.metadata:
+ if isinstance(post.metadata[key], str):
+ self.vars['reactions'][replace] = [post.metadata[key]]
+ elif isinstance(post.metadata[key], list):
+ self.vars['reactions'][replace] = post.metadata[key]
+
+ self._c_adaptify()
+ self._c_snippets()
+ self._c_video()
+ #self._files()
+ super(ArticleHandler, self)._postsetup()
+
+
+class PhotoHandler(SingularHandler):
+ def __init__(self, *args, **kwargs):
+ super(PhotoHandler, self).__init__(*args, **kwargs)
+ self.img = ImageHandler(self.fpath)
+ self._setup()
+
+ def _setvars(self):
+ mapping = {
+ 'camera': [
+ 'EXIF:Model'
+ ],
+ 'aperture': [
+ 'EXIF:FNumber',
+ 'Composite:Aperture'
+ ],
+ 'shutter_speed': [
+ 'EXIF:ExposureTime'
+ ],
+ 'focallength': [
+ 'EXIF:FocalLength',
+ 'Composite:FocalLength35efl',
+ ],
+ 'iso': [
+ 'EXIF:ISO'
+ ],
+ 'lens': [
+ 'Composite:LensID',
+ 'MakerNotes:Lens',
+ 'Composite:LensSpec'
+ ]
+ }
+
+ for ekey, candidates in mapping.items():
+ for candidate in candidates:
+ val = self.img.exif.get(candidate, None)
+ if val:
+ self.vars['exif'][ekey] = val
+ break
+
+ gps = ['Latitude', 'Longitude']
+ for g in gps:
+ gk = 'EXIF:GPS%s' % (g)
+ if gk not in self.img.exif:
+ continue
+
+ r = 'EXIF:GPS%sRef' % (g)
+ ref = None
+ if r in self.img.exif:
+ ref = self.img.exif[r]
+
+ self.vars['exif']['geo_%s' % (g.lower())] = func.gps2dec(
+ self.img.exif[gk],
+ ref
+ )
+
+
+ def _setfromexif_str(self, varkey, exifkeys):
+ for key in exifkeys:
+ val = self.img.exif.get(key, None)
+ if not val:
+ continue
+ self.vars[varkey] = val.strip()
+ return
+
+
+ def _setfromexif_lst(self, varkey, exifkeys):
+ collected = {}
+ for key in exifkeys:
+ val = self.img.exif.get(key, None)
+ if not val:
+ continue
+ if isinstance(val, str):
+ self.img.exif[key] = val.split(",")
+ # not elif: the previous one converts all string to list
+ # we rely on that
+ if isinstance(val, list):
+ for v in val:
+ collected[slugify(str(v).strip())] = str(v).strip()
+
+ self.vars[varkey] = collected.values()
+ return
+
+
+ def _setfromexif_date(self, varkey, exifkeys):
+ pattern = re.compile(
+ "(?P[0-9]{4}):(?P[0-9]{2}):(?P[0-9]{2})\s+"
+ "(?P[0-9]{2}:[0-9]{2}:[0-9]{2})Z?"
+ )
+
+ for key in exifkeys:
+ if key not in self.img.exif:
+ continue
+
+ if not self.img.exif[key]:
+ continue
+
+ date = None
+ v = pattern.match(self.img.exif[key]).groupdict()
+ if not v:
+ continue
+
+ try:
+ date = arrow.get('%s-%s-%s %s' % (v['Y'], v['M'], v['D'], v['T']))
+ except:
+ continue
+
+ if not date:
+ continue
+
+
+ self.vars['published'] = date
+ logging.debug("'published' set to %s from key %s", self.vars['published'], key)
+ return
+
+
+ def _setup(self):
+ self._setfromexif_str('title', [
+ 'XMP:Title',
+ 'XMP:Headline',
+ 'IPTC:Headline'
+ ])
+
+ self._setfromexif_str('raw_content', [
+ 'XMP:Description',
+ 'IPTC:Caption-Abstract'
+ ])
+
+ self._setfromexif_lst('tags', [
+ 'XMP:Keywords',
+ 'IPTC:Keywords'
+ ])
+
+ self._setfromexif_date('published', [
+ 'XMP:DateTimeDigitized',
+ 'XMP:CreateDate',
+ 'EXIF:CreateDate',
+ 'EXIF:ModifyDate'
+ ])
+
+ self._setvars()
+ self.img.title = self.vars['title']
+ self.img.alttext = self.vars['title']
+
+ self.vars['content'] = "%s\n\n%s" % (
+ self.vars['raw_content'],
+ self.img.srcset(generate_caption=False, uphoto=True)
+ )
+
+ self.img.downsize()
+ self.vars['img'] = self.img.featured()
+ super(PhotoHandler, self)._postsetup()
+
+
+class PageHandler(SingularHandler):
+ def __init__(self, *args, **kwargs):
+ super(PageHandler, self).__init__(*args, **kwargs)
+ self.template = 'page.html'
+ self._setup()
+
+
+ def _setup(self):
+ with open(self.fpath) as c:
+ self.vars['raw_content'] = c.read()
+ c.close()
+
+ self._c_adaptify()
+ super(PageHandler, self)._postsetup()
\ No newline at end of file
diff --git a/nasg/taxonomy.py b/nasg/taxonomy.py
new file mode 100644
index 0000000..5db2506
--- /dev/null
+++ b/nasg/taxonomy.py
@@ -0,0 +1,319 @@
+import math
+import logging
+import os
+import collections
+from slugify import slugify
+import nasg.config as config
+import nasg.jinjaenv as jinjaenv
+import arrow
+
+class TaxonomyHandler(object):
+ def __init__(self, name, taxonomy='category', slug='', description='', render=True):
+ logging.info("setting up taxonomy: %s", name)
+ self.name = name
+ self.taxonomy = taxonomy
+ self.description = description
+ self.render = render
+ if slug:
+ self.slug = slug
+ else:
+ self.slug = slugify(self.name, only_ascii=True, lower=True)
+
+ self.posts = collections.OrderedDict()
+ #self.basedir = os.path.join(config.TARGET, self.taxonomy, self.slug)
+
+ if len(self.taxonomy) and len(self.name):
+ self.basedir = os.path.join(config.TARGET, self.taxonomy, self.slug)
+ self.baseurl = "/%s/%s/" % (self.taxonomy, self.slug)
+ else:
+ self.baseurl = '/'
+ self.basedir = os.path.join(config.TARGET)
+
+ self.modtime = 0
+
+
+ def __getitem__(self, key):
+ return self.posts[key]
+
+
+ def __repr__(self):
+ return 'Taxonomy %s (name: %s, slug: %s) with %i posts' % (
+ self.taxonomy,
+ self.name,
+ self.slug,
+ len(self.posts)
+ )
+
+
+ def __next__(self):
+ try:
+ r = self.posts.next()
+ except:
+ raise StopIteration()
+ return r
+
+
+ def __iter__(self):
+ for ix, post in self.posts.items():
+ yield post
+ return
+
+
+ def append(self, post):
+ k = int(post.vars['published'].timestamp)
+ if k in self.posts:
+ logging.error("colliding post timestamps: %s vs %s", self.posts[k].fpath, post.fpath)
+ inc = 1
+ while k in self.posts:
+ k = int(k+1)
+
+ self.posts[k] = post
+ self.posts = collections.OrderedDict(sorted(self.posts.items(), reverse=True))
+
+
+ def write(self):
+ if not self.render:
+ return
+ l = list(self.posts.keys())
+ if len(l):
+ self.modtime = max(list(self.posts.keys()))
+ else:
+ self.modtime = arrow.utcnow().timestamp
+ self._write_pages()
+ self._write_rss()
+
+
+ def _page_vars(self, page, pages, start, end):
+ return {
+ 'taxonomy': {
+ 'url': self.baseurl,
+ 'name': self.name,
+ 'taxonomy': self.taxonomy,
+ 'description': self.description,
+ 'paged': page,
+ 'total': pages,
+ 'perpage': int(config.site['pagination']),
+ },
+ 'site': config.site,
+ 'posts': [self.posts[k].vars for k in list(sorted(
+ self.posts.keys(), reverse=True))[start:end]],
+ }
+
+
+ def _write_file(self, fpath, template, tvars):
+ tmpl = jinjaenv.JINJA2ENV.get_template(template)
+ logging.info("writing %s" % (fpath))
+ with open(fpath, "wt") as f:
+ r = tmpl.render(tvars)
+ f.write(r)
+ f.close()
+ os.utime(fpath, (self.modtime, self.modtime))
+
+
+ def _write_rss(self):
+ rssdir = os.path.join(self.basedir, 'feed')
+ if not os.path.isdir(rssdir):
+ os.makedirs(rssdir)
+ fpath = os.path.join(rssdir, 'index.xml')
+ tvars = self._page_vars(1, 1, 0, int(config.site['rsspagination']))
+ self._write_file(fpath, 'rss.html', tvars)
+
+
+ def _write_page(self, page, pages, start, end):
+ if 1 == page:
+ pagedir = self.basedir
+ else:
+ pagedir = os.path.join(self.basedir, 'page', "%i" % page)
+
+ if not os.path.isdir(pagedir):
+ os.makedirs(pagedir)
+
+ fpath = os.path.join(pagedir, 'index.html')
+ tvars = self._page_vars(page, pages, start, end)
+ self._write_file(fpath, 'archive.html', tvars)
+
+
+ def _write_pages(self):
+ perpage = int(config.site['pagination'])
+ pages = math.ceil(len(self.posts)/perpage)
+ page = 1
+
+ while page <= pages:
+ start = int((page-1) * perpage)
+ end = int(start+perpage)
+ self._write_page(page, pages, start, end)
+ page += 1
+
+
+ #def _test_freshness(self):
+ #t, lp = list(self.posts.items())[0]
+ #self.lptime = lp.ftime.st_mtime
+
+ #if os.path.isfile(self.indexpath):
+ #p = self.indexpath
+ #elif os.path.isfile(self.simplepath):
+ #p = self.simplepath
+ #else:
+ #return False
+
+ #itime = os.stat(p)
+ #if itime.st_mtime == self.lptime and not glob.FORCEWRITE:
+ #logging.debug(
+ #'Taxonomy tree is fresh for %s' % (self.name)
+ #)
+ #return True
+
+ #return False
+
+
+ #def _test_dirs(self):
+ #if not os.path.isdir(self.taxp):
+ #os.mkdir(self.taxp)
+ #if not os.path.isdir(self.basep):
+ #os.mkdir(self.basep)
+
+
+ #def write_paginated(self):
+
+ #if self._test_freshness():
+ #return
+
+ #self._test_dirs()
+
+ #taxp = os.path.join(glob.TARGET, self.taxonomy)
+ #basep = os.path.join(glob.TARGET, self.taxonomy, self.slug)
+
+ #if not os.path.isdir(taxp):
+ #os.mkdir(taxp)
+ #if not os.path.isdir(basep):
+ #os.mkdir(basep)
+
+
+ #pages = math.ceil(len(self.posts) / glob.conf['perpage'])
+ #page = 1
+
+
+ #if len(self.taxonomy) and len(self.slug):
+ #base_url = "/%s/%s/" % (self.taxonomy, self.slug)
+ #else:
+ #base_url = '/'
+
+
+ #while page <= pages:
+ #start = int((page-1) * int(glob.conf['perpage']))
+ #end = int(start + int(glob.conf['perpage']))
+ #dorss = False
+ #posttmpls = [self.posts[k].tmpl() for k in list(sorted(
+ #self.posts.keys(), reverse=True))[start:end]]
+
+ #if page == 1:
+ #tpath = self.indexpath
+ #do_rss = True
+ ## RSS
+
+ #else:
+ #do_rss = False
+ #if not os.path.isdir(self.pagedp):
+ #os.mkdir(self.pagedp)
+
+ #tdir = os.path.join(self.pagedp, "%d" % page)
+
+ #if not os.path.isdir(tdir):
+ #os.mkdir(tdir)
+ #tpath = os.path.join(tdir, "index.html")
+
+ #tvars = {
+ #'taxonomy': {
+ #'url': base_url,
+ #'name': self.name,
+ #'taxonomy': self.taxonomy,
+ #'description': self.description,
+ #'paged': page,
+ #'total': pages,
+ #'perpage': glob.conf['perpage'],
+ #},
+ #'site': glob.conf['site'],
+ #'posts': posttmpls,
+ #}
+
+
+ #tmpl = glob.jinja2env.get_template('archive.html')
+ #logging.info("rendering %s" % (tpath))
+ #with open(tpath, "w") as html:
+ #r = tmpl.render(tvars)
+ #soup = BeautifulSoup(r, "html5lib")
+ #r = soup.prettify()
+ #logging.info("writing %s" % (tpath))
+ #html.write(r)
+ #html.close()
+ #os.utime(tpath, (self.lptime, self.lptime))
+
+ #if do_rss:
+ #feeddir = os.path.join(self.basep, 'feed')
+ #if not os.path.isdir(feeddir):
+ #os.mkdir(feeddir)
+ #feedpath = os.path.join(feeddir, "index.xml")
+ #tmpl = glob.jinja2env.get_template('rss.html')
+ #logging.info("rendering %s" % (feedpath))
+ #with open(feedpath, "w") as html:
+ #r = tmpl.render(tvars)
+ #logging.info("writing %s" % (feedpath))
+ #html.write(r)
+ #html.close()
+ #os.utime(feedpath, (self.lptime, self.lptime))
+
+ #page = page+1
+
+ #def write_simple(self, template='archive.html'):
+
+ #if self._test_freshness():
+ #return
+
+ #self._test_dirs()
+
+ #base_url = "/%s/" % (self.slug)
+
+ #posttmpls = [self.posts[k].tmpl() for k in list(sorted(
+ #self.posts.keys(), reverse=True))]
+
+ #tvars = {
+ #'taxonomy': {
+ #'url': base_url,
+ #'name': self.name,
+ #'taxonomy': self.taxonomy,
+ #'description': self.description,
+ #'paged': 0,
+ #'total': 0,
+ #'perpage': glob.conf['perpage'],
+ #},
+ #'site': glob.conf['site'],
+ #'posts': posttmpls,
+ #}
+
+ #with open(os.path.join(self.simplepath), "w") as html:
+ #html.write(json.dumps(tvars, indent=4, sort_keys=True, default=str))
+ #html.close()
+
+ ##tmpl = glob.jinja2env.get_template('gallery.html')
+ ##logging.info("rendering %s" % (indexpath))
+ ##with open(indexpath, "w") as html:
+ ##r = tmpl.render(tvars)
+ ##soup = BeautifulSoup(r, "html5lib")
+ ##r = soup.prettify()
+ ##logging.info("writing %s" % (indexpath))
+ ##html.write(r)
+ ##html.close()
+ ##os.utime(indexpath, (lptime, lptime))
+
+
+ #def writesitemap(self):
+ #sitemap = "%s/sitemap.txt" % (glob.TARGET)
+ #urls = []
+ #for p in self.posts.items():
+ #t, data = p
+ #urls.append( "%s/%s" % ( glob.conf['site']['url'], data.slug ) )
+
+ #with open(sitemap, "w") as f:
+ #logging.info("writing %s" % (sitemap))
+ #f.write("\n".join(urls))
+ #f.close()
\ No newline at end of file
diff --git a/nasg/tests/cmdline.py b/nasg/tests/cmdline.py
new file mode 100644
index 0000000..bcee844
--- /dev/null
+++ b/nasg/tests/cmdline.py
@@ -0,0 +1,26 @@
+import unittest
+import nasg.cmdline as cmdline
+
+class Test(unittest.TestCase):
+
+ def testException(self):
+ self.assertRaises(
+ ValueError,
+ cmdline.CommandLine,
+ '12345678'
+ )
+
+ def testOK(self):
+ self.assertEqual(
+ cmdline.CommandLine('ls ./test_cmdline.py').run().stdout,
+ './test_cmdline.py'
+ )
+
+ def testExiftool(self):
+ self.assertEqual(
+ cmdline.Exiftool().get(),
+ {}
+ )
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/nasg/tests/func.py b/nasg/tests/func.py
new file mode 100644
index 0000000..13c1666
--- /dev/null
+++ b/nasg/tests/func.py
@@ -0,0 +1,60 @@
+import unittest
+import nasg.func as func
+
+
+class Test(unittest.TestCase):
+
+ def test_baseN_zero(self):
+ self.assertEqual(
+ func.baseN(0),
+ '0'
+ )
+
+ def test_baseN(self):
+ self.assertEqual(
+ func.baseN(1489437846),
+ 'omrtli'
+ )
+
+ def test_gps2dec_W(self):
+ self.assertEqual(
+ func.gps2dec(
+ '103 deg 52\' 32.79" W'
+ ),
+ -103.875775
+ )
+
+ def test_gps2dec_E(self):
+ self.assertEqual(
+ func.gps2dec(
+ '103 deg 52\' 32.79" E'
+ ),
+ 103.875775
+ )
+
+ def test_gps2dec_N(self):
+ self.assertEqual(
+ func.gps2dec(
+ '33 deg 9\' 34.93" N'
+ ),
+ 33.159703
+ )
+
+ def test_gps2dec_S(self):
+ self.assertEqual(
+ func.gps2dec(
+ '33 deg 9\' 34.93" S'
+ ),
+ -33.159703
+ )
+
+ def test_gps2dec(self):
+ self.assertEqual(
+ func.gps2dec(
+ '33 deg 9\' 34.93"'
+ ),
+ 33.159703
+ )
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/nasg/tests/jinjaenv.py b/nasg/tests/jinjaenv.py
new file mode 100644
index 0000000..e043476
--- /dev/null
+++ b/nasg/tests/jinjaenv.py
@@ -0,0 +1,36 @@
+import unittest
+import nasg.jinjaenv as jinjaenv
+import arrow
+
+class CommandLineTest(unittest.TestCase):
+
+ def test_jinja_filter_date(self):
+ t = arrow.utcnow()
+ self.assertEqual(
+ jinjaenv.jinja_filter_date(t.datetime, 'c'),
+ t.format('YYYY-MM-DDTHH:mm:ssZ')
+ )
+
+ def test_jinja_filter_slugify(self):
+ self.assertEqual(
+ jinjaenv.jinja_filter_slugify('Árvíztűrő Tükörfúrógép'),
+ 'arvizturo-tukorfurogep'
+ )
+
+ def test_jinja_filter_search1(self):
+ self.assertTrue(
+ jinjaenv.jinja_filter_search('almafa', 'alma')
+ )
+
+ def test_jinja_filter_search3(self):
+ self.assertTrue(
+ jinjaenv.jinja_filter_search( ['almafa' ], 'almafa')
+ )
+
+ def test_jinja_filter_search2(self):
+ self.assertFalse(
+ jinjaenv.jinja_filter_search('almafa', 'eszeveszett')
+ )
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/nasg/tests/singular.py b/nasg/tests/singular.py
new file mode 100644
index 0000000..345c510
--- /dev/null
+++ b/nasg/tests/singular.py
@@ -0,0 +1,10 @@
+import unittest
+import nasg.singular as singular
+
+class Test(unittest.TestCase):
+
+ def test(self):
+ self.assertEqual('','')
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/nasg/tests/taxonomy.py b/nasg/tests/taxonomy.py
new file mode 100644
index 0000000..282341e
--- /dev/null
+++ b/nasg/tests/taxonomy.py
@@ -0,0 +1,10 @@
+import unittest
+import nasg.taxonomy as taxonomy
+
+class Test(unittest.TestCase):
+
+ def test(self):
+ self.assertEqual('','')
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/new.py b/new.py
deleted file mode 100755
index d1f0503..0000000
--- a/new.py
+++ /dev/null
@@ -1,203 +0,0 @@
-#!/home/petermolnar.net/.venv/bin/python3.5
-
-"""Usage: new.py [-h] [-t TAGS] [-d DATE] [-s SLUG] [-l TITLE] [-b BOOKMARK] [-r REPLY] [-p REPOST] [-c CONTENT] [-u SUMMARY] [-i REDIRECT] [-a CATEGORY]
-
--h --help show this
--t --tags TAGS ';' separated, quoted list of tags
--d --date DATE YYYY-mm-ddTHH:MM:SS+TZTZ formatted date, if not now
--s --slug SLUG slug (normally autogenerated from title or pubdate)
--l --title TITLE title of new entry
--b --bookmark BOOKMARK URL to bookmark
--r --reply REPLY URL to reply to
--p --repost REPOST URL to repost
--c --content CONTENT content of entry
--u --summary SUMMARY summary of entry
--i --redirect REDIRECT ';' separated, quoted list of redirects
--a --category CATEGORY to put the content in this category
-"""
-
-import os
-import sys
-import datetime
-import calendar
-import logging
-import json
-import glob
-import iso8601
-import pytz
-from docopt import docopt
-from slugify import slugify
-from ruamel import yaml
-import singular
-
-class ContentCreator(object):
- def __init__(
- self,
- category='note',
- tags=[],
- date='',
- slug='',
- title='',
- bookmark='',
- reply='',
- repost='',
- content='',
- summary='',
- redirect=[]
- ):
- self.category = category
-
- if date:
- self.date = iso8601.parse_date(date)
- else:
- self.date = datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
- self.time = calendar.timegm(self.date.timetuple())
-
- self.title = title
-
- if slug:
- self.slug = slug
- elif title:
- self.slug = slugify(title, only_ascii=True, lower=True)
- else:
- self.slug = singular.SingularHandler.baseN(self.time)
-
- self.tags = tags
- self.bookmark = bookmark
- self.reply = reply
- self.repost = repost
- if content:
- self.content = content
- else:
- self.content = ''
- self.summary = summary
- self.redirect = redirect
-
- self._makeyaml()
- self._write()
-
-
- def _makeyaml(self):
- self.yaml = {
- 'published': self.date.strftime("%Y-%m-%dT%H:%M:%S%z")
- }
-
- if self.title:
- self.yaml['title'] = self.title
-
- if self.tags:
- self.yaml['tags'] = self.tags
-
- if self.bookmark:
- self.yaml['bookmark-of'] = self.bookmark
-
- if self.repost:
- self.yaml['repost-of'] = self.repost
-
- if self.reply:
- self.yaml['in-reply-to'] = self.reply
-
- if self.summary:
- self.yaml['summary'] = self.summary
-
- if self.redirect:
- self.yaml['redirect'] = self.redirect
-
- def _write(self):
- fdir = os.path.join(glob.CONTENT, self.category)
- if not os.path.isdir(fdir):
- sys.exit("there is no category %s" % (self.category))
-
- self.fpath = os.path.join(glob.CONTENT, self.category, "%s.md" % (self.slug))
- self.out = "---\n" + yaml.dump(self.yaml, Dumper=yaml.RoundTripDumper) + "---\n\n" + self.content
- with open(self.fpath, "w") as archive:
- logging.info("writing %s", self.fpath)
- logging.info("contents: %s", self.out)
- archive.write(self.out)
- archive.close()
-
-
-class ParseCMDLine(object):
- def __init__(self, arguments):
- for x in ['--redirect', '--tags']:
- if x in arguments and arguments[x]:
- arguments[x] = arguments[x].split(";")
-
- self.entry = ContentCreator(
- category=arguments['--category'],
- tags=arguments['--tags'],
- date=arguments['--date'],
- slug=arguments['--slug'],
- title=arguments['--title'],
- bookmark=arguments['--bookmark'],
- reply=arguments['--reply'],
- repost=arguments['--repost'],
- content=arguments['--content'],
- summary=arguments['--summary'],
- redirect=arguments['--redirect']
- )
-
-if __name__ == '__main__':
- args = docopt(__doc__, version='new.py 0.1')
-
- with open(os.path.join(glob.CACHE, "slugs.json")) as sf:
- slugs = json.loads(sf.read())
- sf.close()
-
- if not args['--category']:
- c = 'note'
- args['--category'] = input('Category [%s]: ' % (c)) or c
-
- if not args['--date']:
- d = datetime.datetime.utcnow().replace(tzinfo=pytz.utc).strftime("%Y-%m-%dT%H:%M:%S%z")
- args['--date'] = input('Date [%s]' % (d)) or d
-
- if not args['--title']:
- args['--title'] = input('Title []:') or ''
-
- if not args['--tags']:
- args['--tags'] = input('Tags (separated by ;, no whitespace) []:') or []
-
- if not args['--bookmark']:
- args['--bookmark'] = input('Bookmark of URL []:') or ''
-
- if not args['--reply']:
- args['--reply'] = input('Reply to URL []:') or ''
-
- if not args['--repost']:
- args['--repost'] = input('Repost of URL []:') or ''
-
- if not args['--slug']:
- if args['--title']:
- slug = slugify(args['--title'], only_ascii=True, lower=True)
- elif args['--bookmark']:
- slug = slugify("re: %s" % (args['--bookmark']), only_ascii=True, lower=True)
- elif args['--reply']:
- slug = slugify("re: %s" % (args['--reply']), only_ascii=True, lower=True)
- elif args['--repost']:
- slug = slugify("re: %s" % (args['--repost']), only_ascii=True, lower=True)
- else:
- d = iso8601.parse_date(args['--date'])
- t = calendar.timegm(d.timetuple())
- slug = singular.SingularHandler.baseN(t)
- args['--slug'] = input('Slug [%s]:' % (slug)) or slug
-
- if args['--slug'] in slugs:
- logging.warning("This slug already exists: %s", args['--slug'])
- slugbase = args['--slug']
- inc = 1
- while args['--slug'] in slugs:
- args['--slug'] = "%s-%d" % (slugbase, inc)
- inc = inc+1
- logging.warning("Using %s as slug", args['--slug'])
-
- if not args['--summary']:
- args['--summary'] = input('Summary []:') or ''
-
- if not args['--content']:
- args['--content'] = input('Content []:') or ''
-
- if not args['--redirect']:
- args['--reditect'] = input('Additional slugs (separated by ;, no whitespace) []:') or []
-
- p = ParseCMDLine(args)
\ No newline at end of file
diff --git a/receiver.py b/receiver.py
deleted file mode 100644
index 822eb21..0000000
--- a/receiver.py
+++ /dev/null
@@ -1,850 +0,0 @@
-import glob
-import asyncio
-import uvloop
-import os
-from sanic import Sanic
-import sanic.response
-from sanic.log import log as logging
-from whoosh import index, qparser
-import pynmea2
-import datetime
-import pytz
-import re
-import validators
-import requests
-import pypandoc
-import hashlib
-import time
-from webmentiontools import urlinfo
-import json
-import calendar
-import mimetypes
-import singular
-import urllib.parse
-from ruamel import yaml
-from slugify import slugify
-import smtplib
-import iso8601
-import csv
-import shutil
-import collections
-from git import Repo, Actor
-import frontmatter
-#import gzip
-import arrow
-
-class ToEmail(object):
- def __init__(self, webmention):
- self.webmention = webmention
- self.set_html()
- self.set_headers()
-
-
- def set_html(self):
- for authormeta in ['email', 'name', 'url']:
- if not authormeta in self.webmention['author']:
- self.webmention['author'][authormeta] = ''
-
- html = """
-
-
-
-
- New %s
-
-
- - From
- -
- %s
- %s
-
- - Source
- - %s
- - Target
- - %s
-
- %s
-
- """ % (
- self.webmention['type'],
- self.webmention['author']['url'],
- self.webmention['author']['name'],
- self.webmention['author']['email'],
- self.webmention['author']['email'],
- self.webmention['source'],
- self.webmention['source'],
- self.webmention['target'],
- self.webmention['target'],
- pypandoc.convert_text(
- self.webmention['content'],
- to='html5',
- format="markdown+" + "+".join([
- 'backtick_code_blocks',
- 'auto_identifiers',
- 'fenced_code_attributes',
- 'definition_lists',
- 'grid_tables',
- 'pipe_tables',
- 'strikeout',
- 'superscript',
- 'subscript',
- 'markdown_in_html_blocks',
- 'shortcut_reference_links',
- 'autolink_bare_uris',
- 'raw_html',
- 'link_attributes',
- 'header_attributes',
- 'footnotes',
- ])
- )
- )
- self.html = html
-
- def set_headers(self):
- """ Create and send email from a parsed webmention """
-
- self.headers = {
- 'Content-Type': 'text/html; charset=utf-8',
- 'Content-Disposition': 'inline',
- 'Content-Transfer-Encoding': '8bit',
- 'Date': self.webmention['date'].strftime('%a, %d %b %Y %H:%M:%S %Z'),
- 'X-WEBMENTION-SOURCE': self.webmention['source'],
- 'X-WEBMENTION-TARGET': self.webmention['target'],
- 'From': glob.conf['from']['address'],
- 'To': glob.conf['to']['address'],
- 'Subject': "[webmention] from %s to %s" % ( self.webmention['source'], self.webmention['target'] ),
- }
-
-
- def send(self):
- msg = ''
- for key, value in self.headers.items():
- msg += "%s: %s\n" % ( key, value )
-
- msg += "\n%s\n" % self.html
-
- try:
- s = smtplib.SMTP( glob.conf['smtp']['host'], glob.conf['smtp']['port'] )
- if glob.conf['smtp']['tls']:
- s.ehlo()
- s.starttls()
- s.ehlo()
-
- if glob.conf['smtp']['username'] and glob.conf['smtp']['password']:
- s.login(glob.conf['smtp']['username'], glob.conf['smtp']['password'])
-
- s.sendmail( self.headers['From'], [ self.headers['To'] ], msg.encode("utf8") )
- s.quit()
- except:
- print("Unexpected error:", sys.exc_info()[0])
- raise
-
-
-class MicropubHandler(object):
- def __init__(self, request):
- self.request = request
- self.response = sanic.response.text("Unhandled error", status=500)
-
- self.slug = ''
- self.content = ''
- self.category = 'note'
- self.meta = {}
- self.dt = datetime.datetime.now().replace(tzinfo=pytz.utc)
-
- logging.debug("incoming micropub request:")
- logging.debug(self.request.body)
-
- logging.debug("** args:")
- logging.debug(self.request.args)
-
- logging.debug("** query string:")
- logging.debug(self.request.query_string)
-
- logging.debug("** headers:")
- logging.debug(self.request.headers)
-
- with open(os.path.join(glob.CACHE, "tags.json"), "r") as db:
- self.existing_tags = json.loads(db.read())
- db.close()
-
- self._parse()
-
- def _verify(self):
- if 'q' in self.request.args:
- if 'config' in self.request.args['q']:
- self.response = sanic.response.json({
- 'tags': self.existing_tags
- }, status=200)
- return
- if 'syndicate-to' in self.request.args['q']:
- self.response = sanic.response.json({
- 'syndicate-to': []
- }, status=200)
- return
-
- if not 'access_token' in self.request.form:
- self.response = sanic.response.text("Mising access token", status=401)
- return
-
- token = self.request.form.get('access_token')
-
- verify = requests.get(
- 'https://tokens.indieauth.com/token',
- allow_redirects=False,
- timeout=10,
- headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'Authorization': 'Bearer %s' % (token)
- });
-
- if verify.status_code != requests.codes.ok:
- self.response = sanic.response.text("Could not verify access token", status=500)
- return False
-
- response = urllib.parse.parse_qs(verify.text)
- logging.debug(response)
- if 'scope' not in response or 'me' not in response:
- self.response = sanic.response.text("Could not verify access token", status=401)
- return False
-
- if '%s/' % (glob.conf['site']['url'].rstrip()) not in response['me']:
- self.response = sanic.response.text("You can't post to this domain.", status=401)
- return False
-
- if 'post' not in response['scope'] and 'create' not in response['scope']:
- self.response = sanic.response.text("Invalid scope", status=401)
- return False
-
- return True
-
- def _parse(self):
- if not self._verify():
- return
-
- if len(self.request.files):
- self.response = sanic.response.text("File handling is not yet done", status=501)
- return
- #for ffield in self.request.files.keys():
- #logging.info("got file field: %s" % ffield)
- #f = self.request.files.get(ffield)
- #logging.info("mime is: %s" % f.type)
- #logging.info("ext should be: %s" % mimetypes.guess_extension(f.type))
-
- ##f.body
- ##f.type
- ##logging.info( f )
-
- self.meta['published'] = self.dt.strftime('%Y-%m-%dT%H:%M:%S%z')
-
- slug = None
-
- if 'content' in self.request.form and len(self.request.form.get('content')):
- self.content = self.request.form.get('content')
-
- if 'summary' in self.request.form and len(self.request.form.get('summary')):
- self.meta['summary'] = self.request.form.get('summary')
-
- if 'slug' in self.request.form and len(self.request.form.get('slug')):
- slug = self.request.form.get('slug')
-
- if 'name' in self.request.form and len(self.request.form.get('name')):
- self.meta['title'] = self.request.form.get('name')
- if not slug:
- slug = self.meta['title']
-
- if 'in-reply-to' in self.request.form and len(self.request.form.get('in-reply-to')):
- self.meta['in-reply-to'] = self.request.form.get('in-reply-to')
- if not slug:
- slug = 're: %s', self.meta['in-reply-to']
-
- if 'repost-of' in self.request.form and len(self.request.form.get('repost-of')):
- self.meta['repost-of'] = self.request.form.get('repost-of')
- category = 'bookmark'
- if not slug:
- slug = '%s', self.meta['repost-of']
-
- if 'bookmark-of' in self.request.form and len(self.request.form.get('bookmark-of')):
- self.meta['bookmark-of'] = self.request.form.get('bookmark-of')
- self.category = 'bookmark'
- if not slug:
- slug = '%s', self.meta['bookmark-of']
-
- if 'category[]' in self.request.form:
- self.meta['tags'] = list(self.request.form['category[]'])
- if 'summary' in self.meta and ('IT' in self.meta['tags'] or 'it' in self.meta['tags']):
- self.category = 'article'
- elif 'summary' in self.meta and ('journal' in self.meta['tags'] or 'journal' in self.meta['tags']):
- self.category = 'journal'
-
- if not slug:
- slug = singular.SingularHandler.baseN(calendar.timegm(self.dt.timetuple()))
-
- self.slug = slugify(slug, only_ascii=True, lower=True)
- self._write()
-
- def _write(self):
- fpath = os.path.join(glob.CONTENT, self.category, '%s.md' % (self.slug))
- if os.path.isfile(fpath):
- self.response = sanic.response.text("Update handling is not yet done", status=501)
- return
-
- logfile = os.path.join(glob.LOGDIR, "micropub-%s.log" % (self.dt.strftime("%Y-%m")))
- with open (logfile, 'a') as micropublog:
- logging.debug("logging micropub request")
- micropublog.write("%s %s\n" % (self.dt.strftime('%Y-%m-%dT%H:%M:%S%z'), fpath))
- micropublog.close()
-
- with open (fpath, 'w') as mpf:
- logging.info("writing file to: %s", fpath)
- out = "---\n" + yaml.dump(self.meta, Dumper=yaml.RoundTripDumper, allow_unicode=True, indent=4) + "---\n\n" + self.content
- mpf.write(out)
- mpf.close()
-
- self._git(fpath)
-
- logging.info("trying to open and parse the received post")
- post = singular.ArticleHandler(fpath, category=self.category)
- post.write()
- post.pings()
-
- self.response = sanic.response.text(
- "Post created",
- status = 201,
- headers = {
- 'Location': "%s/%s/" % (glob.conf['site']['url'], self.slug)
- }
- )
-
- return
-
- def _git(self, fpath):
- logging.info("committing to git")
- repo = Repo(glob.CONTENT)
- author = Actor(glob.conf['author']['name'], glob.conf['author']['email'])
- index = repo.index
- newfile = fpath.replace(glob.CONTENT, '').lstrip('/')
- index.add([newfile])
- message = 'new content via micropub: %s' % (newfile)
- index.commit(message, author=author, committer=author)
-
-
-class SearchHandler(object):
- def __init__ (self, query):
- self.query = query
- self.response = sanic.response.text("You seem to have forgot to enter what you want to search for. Please try again.", status=400)
-
- if not query:
- return
-
- self._tmpl = glob.jinja2env.get_template('searchresults.html')
- self._ix = index.open_dir(glob.SEARCHDB)
- self._parse()
-
- def _parse(self):
- self.query = self.query.replace('+', ' AND ')
- self.query = self.query.replace(' -', ' NOT ')
- qp = qparser.MultifieldParser(
- ["title", "content", "tags"],
- schema = glob.schema
- )
- q = qp.parse(self.query)
- r = self._ix.searcher().search(q, sortedby="weight", limit=100)
- logging.info("results for '%s': %i", self.query, len(r))
- results = []
- for result in r:
- res = {
- 'title': result['title'],
- 'url': result['url'],
- 'highlight': result.highlights("content"),
- }
-
- if 'img' in result:
- res['img'] = result['img']
-
- results.append(res)
-
- tvars = {
- 'term': self.query,
- 'site': glob.conf['site'],
- 'posts': results,
- 'taxonomy': {}
- }
- logging.info("collected %i results to render", len(results))
- html = self._tmpl.render(tvars)
- self.response = sanic.response.html(html, status=200)
-
-
-class WebmentionHandler(object):
- def __init__ ( self, source, target ):
- self.source = source
- self.target = target
- self.time = arrow.utcnow().timestamp
- logging.debug("validating: from: %s; to: %s" % (self.source, self.target) )
- self.response = sanic.response.json({
- 'status': 'ok','msg': 'accepted',
- }, 200)
- self._validate()
- self._parse()
- self._archive()
- self._send()
-
- def _validate(self):
- if not validators.url(self.source):
- self.response = sanic.response.json({
- 'status': 'error','msg': '"souce" parameter is an invalid URL',
- }, 400)
- return
-
- if not validators.url(self.target):
- self.response = sanic.response.json({
- 'status': 'error','msg': '"target" parameter is an invalid URL',
- }, 400)
- return
-
- _target = urllib.parse.urlparse(self.target)
- _target_domain = '{uri.netloc}'.format(uri=_target)
-
- if not _target_domain in glob.conf['accept_domains']:
- self.response = sanic.response.json({
- 'status': 'error',
- 'msg': "%s' is not in the list of allowed domains" % (
- _target_domain
- )
- }, 400)
- return
-
- _source = urllib.parse.urlparse(self.source)
- _source_domain = '{uri.netloc}'.format(uri=_source)
-
- if _source_domain == _target_domain and not glob.conf['allow_selfmention']:
- self.response = sanic.response.json({
- 'status': 'error',
- 'msg': "selfpings are disabled"
- }, 400)
- return
-
- return
-
- def _parse(self):
- if self.response.status != 200:
- return
-
- self._log()
- self._source = urlinfo.UrlInfo(self.source)
- if self._source.error:
- logging.warning( "couldn't fetch %s; dropping webmention" % (self.source))
- return
- self.source = self._source.realurl
- if not self._source.linksTo(self.target):
- logging.warning( "%s is not linking to %s; dropping webmention" % (self.source, self.target))
- return
-
- self._target = urlinfo.UrlInfo(self.target)
- if self._target.error:
- logging.warning( "couldn't fetch %s; dropping webmention" % (self.target))
- return
- self.target = self._target.realurl
-
- self.webmention = {
- 'author': self._source.author(),
- 'type': self._source.relationType(),
- 'target': self.target,
- 'source': self.source,
- 'date': arrow.get(self._source.pubDate()),
- 'content': pypandoc.convert_text(
- self._source.content(),
- to="markdown-" + "-".join([
- 'raw_html',
- 'native_divs',
- 'native_spans',
- ]),
- format='html'
- )
- }
-
-
- def _send(self):
- if self.response.status != 200:
- return
-
- m = ToEmail(self.webmention)
- m.send()
-
-
- def _archive(self):
- if self.response.status != 200:
- return
-
- fbase = self.webmention['date'].format('YYYY-MM-DD-HH-mm-ss')
- fpath = self._archive_name(fbase)
-
- archive = dict(self.webmention)
- archive['date'] = archive['date'].format('YYYY-MM-DDTHH.mm.ssZ')
- content = archive['content']
- del(archive['content'])
-
- with open (fpath, 'w') as f:
- logging.info("writing file to: %s", fpath)
- out = "---\n" + yaml.dump(
- archive,
- Dumper=yaml.RoundTripDumper,
- allow_unicode=True,
- indent=4
- ) + "---\n\n" + content
- f.write(out)
- f.close()
-
- def _verify_archive(self, p):
- archive = frontmatter.load(p)
-
- if 'target' not in archive.metadata:
- logging.warning('missing target')
- return False
-
- if 'source' not in archive.metadata:
- logging.warning('missing source')
- return False
-
- if 'date' not in archive.metadata:
- logging.warning('missing date')
- return False
-
- if archive.metadata['target'] != self.webmention['target']:
- logging.warning('target different')
- return False
-
- if archive.metadata['source'] != self.webmention['source']:
- logging.warning('source different')
- return False
-
- d = arrow.get(archive.metadata['date'])
-
- if d.timestamp != self.webmention['date'].timestamp:
- logging.warning('date different')
- return False
-
- # overwrite
- return True
-
- def _archive_name(self, archive, ext='.md'):
- p = os.path.join(glob.COMMENTS, "%s%s" % (archive, ext))
-
- if not os.path.exists(p):
- logging.debug("%s doesn't exits yet" % p)
- return p
-
- logging.debug("%s exists, checking for update" % p)
- if self._verify_archive(p):
- return p
-
- # another comment with the exact same second? wy not.
- names = [x for x in os.listdir(glob.COMMENTS) if x.startswith(archive)]
- suffixes = [x.replace(archive, '').replace(ext, '').replace('.','') for x in names]
- indexes = [int(x) for x in suffixes if x and set(x) <= set('0123456789')]
- idx = 1
- if indexes:
- idx += sorted(indexes)[-1]
-
- return os.path.join(glob.COMMENTS, "%s.%d%s" % (archive, idx, ext))
-
- def _log(self):
- if not os.path.isdir(glob.LOGDIR):
- os.mkdir (glob.LOGDIR)
-
- logfile = os.path.join(glob.LOGDIR, datetime.datetime.now().strftime("%Y-%m"))
- s = json.dumps({
- 'time': self.time,
- 'source': self.source,
- 'target': self.target
- })
-
- with open(logfile, "a") as log:
- logging.debug( "writing logfile %s with %s" % (logfile, s))
- log.write("%s\n" % (s))
- log.close()
-
-
-class TimeSeriesHandler(object):
- def __init__(self, tag):
- if not os.path.isdir(glob.TSDBDIR):
- os.mkdir(glob.TSDBDIR)
-
- self.tag = tag
- self.p = os.path.join(glob.TSDBDIR, '%s.csv' % (self.tag))
- self.db = {}
-
- #def _loaddb(self):
- #if not os.path.isfile(self.p):
- #return
-
- #pattern = re.compile(r'^([0-9-\+:T]+)\s+(.*)$')
- #searchfile = open(self.p, 'r')
- #for line in searchfile:
- #matched = re.match(pattern, line)
- #if not matched:
- #continue
-
- #epoch = int(iso8601.parse_date(matched.group(1)).replace(tzinfo=pytz.utc).strftime('%s'))
- #data = matched.group(2)
- #self.db[epoch] = data
- #searchfile.close()
-
- #def _dumpdb(self):
- #lines = []
- #for e in self.db.items():
- #epoch, data = e
- #tstamp = datetime.datetime.utcfromtimestamp(epoch).replace(tzinfo=pytz.utc).strftime(glob.ISODATE)
- #line = '%s %s' % (tstamp, data)
- #lines.append(line)
-
- #bkp = '%s.bkp' % (self.p)
- #shutil.copy(self.p, bkp)
- #with open(self.p, "w") as searchfile:
-
- #searchfile.write()
- #del(cr)
- #csvfile.close()
- #os.unlink(bkp)
-
- @staticmethod
- def _common_date_base(d1, d2):
- d1 = d1.replace(tzinfo=pytz.utc).strftime(glob.ISODATE)
- d2 = d2.replace(tzinfo=pytz.utc).strftime(glob.ISODATE)
- l = len(d1)
- common = ''
- for i in range(l):
- if d1[i] == d2[i]:
- common = common + d1[i]
- else:
- break
- return common
-
- def search(self, when, tolerance=1800):
- when = when.replace(tzinfo=pytz.utc)
- tolerance = int(tolerance/2)
- minwhen = when - datetime.timedelta(seconds=tolerance)
- maxwhen = when + datetime.timedelta(seconds=tolerance)
-
- closest = None
- mindiff = float('inf')
- common = TimeSeriesHandler._common_date_base(minwhen, maxwhen)
- pattern = re.compile(r'^(%s[0-9-\+:T]+)\s+(.*)$' % (common))
- searchfile = open(self.p, 'r')
- for line in searchfile:
- matched = re.match(pattern, line)
- if not matched:
- continue
-
- d = iso8601.parse_date(matched.group(1))
- diff = d - when
- diff = abs(diff.total_seconds())
- if diff >= mindiff:
- continue
-
- mindiff = diff
- closest = (d, matched.group(2))
- searchfile.close()
- return closest
-
- def append(self, data, dt=datetime.datetime.now().replace(tzinfo=pytz.utc)):
- if os.path.isfile(self.p):
- epoch = int(dt.strftime('%s'))
- stat = os.stat(self.p)
- if epoch < stat.st_mtime:
- logging.warning('Refusing to append %s with old data' % self.p)
- return
-
- with open(self.p, 'a') as db:
- db.write("%s %s\n" % (
- dt.strftime(glob.ISODATE),
- data
- ))
-
-
-class DataHandler(object):
- def __init__(self, request):
- self.request = request
- self.dt = datetime.datetime.now().replace(tzinfo=pytz.utc)
- self.response = sanic.response.text('accepted',status=200)
-
- if not 'secrets' in glob.conf or \
- not 'devices' in glob.conf['secrets']:
- self.response = sanic.response.text(
- 'server configuration error',
- status=501
- )
- return
-
- if 'id' not in self.request.args:
- self.response = sanic.response.text(
- 'device id not found in request',
- status=401
- )
- return
-
- id = self.request.args.get('id')
- if id not in glob.conf['secrets']['devices'].keys():
- self.response = sanic.response.text(
- 'device id rejected',
- status=401
- )
- return
-
- self.id = glob.conf['secrets']['devices'][id]
-
-class OpenGTSHandler(DataHandler):
- def __init__(self, *args, **kwargs):
- super(OpenGTSHandler, self).__init__(*args, **kwargs)
- self.lat = 0
- self.lon = 0
- self.alt = 0
- self._parse()
- self.l = '%s 0' % (self.dt.strftime(glob.ISODATE))
-
- def _parse(self):
- logging.debug('--- incoming location request ---')
- logging.debug(self.request.args)
-
- if 'latitude' in self.request.args and 'longitude' in self.request.args:
- self.lat = float(self.request.args.get('latitude'))
- self.lon = float(self.request.args.get('longitude'))
- elif 'gprmc' in self.request.args:
- gprmc = pynmea2.parse(self.request.args.get('gprmc'))
- try:
- self.lat = float(gprmc.latitude)
- self.lon = float(gprmc.longitude)
- except:
- self.response = sanic.response.text(
- "could not process gprmc string",
- status=422
- )
- return
- else:
- self.response = sanic.response.text(
- "no location information found in query",
- status=401
- )
- return
-
- if 'exclude_coordinates' in glob.conf['secrets']:
- excl = {}
- for t in ['lat', 'lon']:
- excl[t] = []
- if t in glob.conf['secrets']['exclude_coordinates']:
- for c in glob.conf['secrets']['exclude_coordinates'][t]:
- excl[t].append(float(c))
-
- if round(self.lat,2) in excl['lat'] and round(self.lon,2) in excl['lon']:
- self.response = sanic.response.text(
- "this location is on the excluded list",
- status=200
- )
- return
-
- if 'loc_timestamp' in self.request.args and 'offset' in self.request.args:
- # this is a bit ugly: first convert the epoch to datetime
- # then append it with the offset as string
- # and convert the string back to datetime from the iso8601 string
- dt = datetime.datetime.utcfromtimestamp(int(self.request.args.get('loc_timestamp')))
- dt = dt.strftime('%Y-%m-%dT%H:%M:%S')
- dt = "%s%s" % (dt, self.request.args.get('offset'))
- try:
- self.dt = iso8601.parse_date(dt).replace(tzinfo=pytz.utc)
- except:
- pass
-
- if 'altitude' in self.request.args:
- self.alt = float(self.request.args.get('altitude'))
- else:
- try:
- self.alt = OpenGTSHandler.altitude_from_bing(self.lat, self.lon)
- except:
- pass
-
- self.lat = "{:4.6f}".format(float(self.lat))
- self.lon = "{:4.6f}".format(float(self.lon))
- self.alt = "{:4.6f}".format(float(self.alt))
- l = '%s %s %s' % (self.lat, self.lon, self.alt)
-
- gpsfile = TimeSeriesHandler('location')
- gpsfile.append(l, dt=self.dt)
-
- @staticmethod
- def altitude_from_bing(lat, lon):
- if 'bing_key' not in glob.conf['secrets']:
- return 0
- if not glob.conf['secrets']['bing_key']:
- return 0
-
- url = "http://dev.virtualearth.net/REST/v1/Elevation/List?points=%s,%s&key=%s" % (
- lat,
- lon,
- glob.conf['secrets']['bing_key']
- )
-
- bing = requests.get(url)
- bing = json.loads(bing.text)
- if 'resourceSets' not in bing or \
- 'resources' not in bing['resourceSets'][0] or \
- 'elevations' not in bing['resourceSets'][0]['resources'][0] or \
- not bing['resourceSets'][0]['resources'][0]['elevations']:
- return 0
-
- alt = float(bing['resourceSets'][0]['resources'][0]['elevations'][0])
- del(bing)
- del(url)
- return alt
-
-
-class SensorHandler(DataHandler):
- def __init__(self, *args, **kwargs):
- super(SensorHandler, self).__init__(*args, **kwargs)
- self.data = 0
- self.tag = ''
- self._parse()
-
- def _parse(self):
- logging.debug('--- incoming sensor request ---')
- logging.debug(self.request.args)
-
- for tag in self.request.args:
- if tag == 'id':
- continue
-
- datafile = TimeSeriesHandler('%s-%s' % (self.id, tag))
- datafile.append(self.request.args.get(tag), dt=self.dt)
-
-
-asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
-app = Sanic()
-
-@app.route("/webmention")
-async def wm(request, methods=["POST"]):
- source = request.form.get('source')
- target = request.form.get('target')
- r = WebmentionHandler(source, target)
- return r.response
-
-@app.route("/search")
-async def search(request, methods=["GET"]):
- query = request.args.get('s')
- r = SearchHandler(query)
- return r.response
-
-@app.route("/micropub")
-async def mpub(request, methods=["POST","GET"]):
- r = MicropubHandler(request)
- return r.response
-
-@app.route("/opengts")
-async def opengts(request, methods=["GET"]):
- r = OpenGTSHandler(request)
- return r.response
-
-@app.route("/sensor")
-async def sensor(request, methods=["GET"]):
- r = SensorHandler(request)
- return r.response
-
-if __name__ == "__main__":
- app.run(host="127.0.0.1", port=8000, debug=True)
\ No newline at end of file
diff --git a/singular.py b/singular.py
deleted file mode 100644
index 9277f37..0000000
--- a/singular.py
+++ /dev/null
@@ -1,916 +0,0 @@
-import os
-import re
-import sys
-import collections
-import logging
-import glob
-import img
-import pypandoc
-import langdetect
-from cache import Cached
-from slugify import slugify
-from ruamel import yaml
-from bs4 import BeautifulSoup
-import frontmatter
-from webmentiondb import WebmentionDB
-import arrow
-import json
-import socket
-import requests
-import hashlib
-import shutil
-
-
-class SingularHandler(object):
-
- def __init__(self, fpath, pingdb=WebmentionDB(), category='note'):
- self.fpath = os.path.abspath(fpath)
- path, fname = os.path.split(self.fpath)
- fname, ext = os.path.splitext(fname)
- self.fname = fname
- self.fext = ext
- self.ftime = os.stat(self.fpath)
- self.target = os.path.join(glob.TARGET, "%s.html" % (self.fname))
-
- basedir = os.path.join(glob.TARGET, "%s" % (self.fname))
- if not os.path.isdir(basedir):
- os.mkdir(basedir)
-
- self.saved = os.path.join(glob.TARGET, "%s" % (self.fname), "saved.html")
-
- self.pingdb = pingdb
- self.title = ''
- self.content = ''
- self._content = ''
- self.summary = ''
- self.html = ''
- self.sumhtml = ''
- self.category = category
- self.tags = []
- self.reactions = {}
- #self.date = datetime.datetime(1970, 1, 1).replace(tzinfo=pytz.utc)
- self.date = arrow.get(0)
- self.updated = None
- self.dtime = 0
- self.utime = 0
- self.redirect = {}
-
- self.exifmin = {}
- self.lang = glob.conf['site']['lang']
- self.syndicate = {}
- self.syndications = []
- self.template = 'singular.html'
-
- self.slug = slugify(self.fname, only_ascii=True, lower=True)
- self.shortslug = slugify(self.fname, only_ascii=True, lower=True)
- self.img = None
- self.srcset = ''
-
- def __repr__(self):
- return "Post '%s' (%s), category: %s" % (self.title,self.fname,self.category)
-
-
- def _postsetup(self):
- """ Shared post-setup - the initial thing, such at title, should be
- set by the classes inheriting this one; these are only the common,
- shared variables """
-
- # set published epoch
- #self.dtime = calendar.timegm(self.date.timetuple())
- self.dtime = self.date.timestamp
-
- # set updated epoch, if any and set the original file date according
- # to either the updated or the published time
- if self.updated:
- #self.utime = calendar.timegm(self.updated.timetuple())
- self.utime = self.updated.timestamp
- if self.utime > 0 and self.utime != self.ftime.st_mtime:
- os.utime(self.fpath, (self.utime, self.utime))
- elif self.dtime > 0 and self.dtime != self.ftime.st_mtime:
- os.utime(self.fpath, (self.dtime, self.dtime))
-
- # generate shortslug from dtime if possible
- if self.dtime > 0:
- self.shortslug = SingularHandler.baseN(self.dtime)
- self.redirect[self.shortslug] = 1
-
- # detect post content language if possible
- try:
- self.lang = langdetect.detect("%s\n\n%s" % (self.title, self.content))
- except:
- pass
-
- # make HTML from markdown via pandoc for the content and the summary
- self.html = SingularHandler.pandoc_md2html(
- self.content,
- time=self.ftime
- )
- self.sumhtml = SingularHandler.pandoc_md2html(
- self.summary,
- time=self.ftime
- )
-
- self.url = "%s/%s" % (glob.conf['site']['url'], self.slug)
- self.syndications = self.pingdb.posses(self.url)
-
- #def urlsvg(self):
- # import pyqrcode
- # import tempfile
- ## generate qr code to the url
- #qrname = tempfile.NamedTemporaryFile(prefix='pyqr_')
- #qr = pyqrcode.create(self.url, error='L')
- #qr.svg(
- #qrname.name,
- #xmldecl=False,
- #omithw=True,
- #scale=1,
- #quiet_zone=0,
- #svgclass='qr',
- #lineclass='qrline'
- #)
- #with open(qrname.name) as f:
- #qrsvg = f.read()
- #f.close()
- #return qrsvg
-
- @staticmethod
- def pandoc_md2html(t, time=None):
- if len(t) == 0:
- return t
-
- cached = Cached(text="%s" % t, stime=time)
- c = cached.get()
-
- if c:
- return c
- else:
- extras = [
- 'backtick_code_blocks',
- 'auto_identifiers',
- 'fenced_code_attributes',
- 'definition_lists',
- 'grid_tables',
- 'pipe_tables',
- 'strikeout',
- 'superscript',
- 'subscript',
- 'markdown_in_html_blocks',
- 'shortcut_reference_links',
- 'autolink_bare_uris',
- 'raw_html',
- 'link_attributes',
- 'header_attributes',
- 'footnotes',
- ]
- md = "markdown+" + "+".join(extras)
-
- t = pypandoc.convert_text(t, to='html5', format=md)
- cached.set(t)
- return t
-
- @staticmethod
- def pandoc_html2md(t, time=None):
- if len(t) == 0:
- return t
-
- cached = Cached(text="%s" % t, stime=time)
- c = cached.get()
-
- if c:
- return c
- else:
- t = pypandoc.convert_text(
- t,
- to="markdown-" + "-".join([
- 'raw_html',
- 'native_divs',
- 'native_spans',
- ]),
- format='html'
- )
-
- cached.set(t)
- return t
-
-
- def tmpl(self):
- return {
- 'title': self.title,
- 'published': self.date,
- 'tags': self.tags,
- 'author': glob.conf['author'],
- 'content': self.content,
- 'html': self.html,
- 'category': self.category,
- 'reactions': self.reactions,
- 'updated': self.updated,
- 'summary': self.sumhtml,
- 'exif': self.exifmin,
- 'lang': self.lang,
- 'syndicate': self.syndicate,
- 'slug': self.slug,
- 'shortslug': self.shortslug,
- 'srcset': self.srcset,
- }
-
- @staticmethod
- def write_redirect(sslug, target, tstamp=arrow.utcnow().timestamp):
-
- tmpl = glob.jinja2env.get_template('redirect.html')
- jvars = {
- 'url': target
- }
- r = tmpl.render(jvars)
- # this is to support / ending urls even for the redirects
- dirs = [
- os.path.join(glob.TARGET, sslug)
- ]
-
- for d in dirs:
- if not os.path.exists(d):
- os.mkdir(d)
-
- files = [
- os.path.join(glob.TARGET, "%s.html" % (sslug)),
- os.path.join(glob.TARGET, sslug, "index.html")
- ]
- for f in files:
- if os.path.isfile(f):
- rtime = os.stat(f)
- if tstamp == rtime.st_mtime:
- logging.debug(
- "Unchanged dates on redirect file %s", f
- )
- continue
-
- with open(f, "w") as html:
- logging.info("writing redirect file %s", f)
- html.write(r)
- html.close()
- os.utime(f, (tstamp,tstamp))
-
-
- def redirects(self):
- """ Write redirect HTMLs """
-
- if self.category == 'page':
- return
-
- for sslug in self.redirect.keys():
- SingularHandler.write_redirect(sslug, self.url, self.ftime.st_mtime)
-
- def write(self):
- """ Write HTML file """
-
- if os.path.isfile(self.target):
- ttime = os.stat(self.target)
- if self.ftime.st_mtime == ttime.st_mtime and not glob.FORCEWRITE:
- logging.debug(
- "Unchanged dates on %s; skipping rendering and writing",
- self.fname
- )
- return
-
- tmpl = glob.jinja2env.get_template(self.template)
- logging.info("rendering %s", self.fname)
- tmplvars = {
- 'post': self.tmpl(),
- 'site': glob.conf['site'],
- 'taxonomy': {},
- }
- r = tmpl.render(tmplvars)
- soup = BeautifulSoup(r,"html5lib")
- r = soup.prettify()
-
- targets = [self.target]
- for target in targets:
- with open(target, "w") as html:
- logging.info("writing %s", target)
- html.write(r)
- html.close()
- os.utime(target, (self.ftime.st_mtime, self.ftime.st_mtime))
-
- rdir = os.path.join(glob.TARGET, self.slug)
- if not os.path.isdir(rdir):
- os.mkdir(rdir)
-
- altdst = os.path.join(glob.TARGET, self.slug, 'index.html')
- altsrc = os.path.join('..', self.target)
-
- if not os.path.islink(altdst):
- if os.path.isfile(altdst):
- os.unlink(altdst)
- os.symlink(altsrc, altdst)
-
- #links = []
- #for r in self.reactions.items():
- #reactiontype, urls = r
- #if isinstance(urls, str):
- #links.append(urls)
- #elif isinstance(urls, list):
- #links = [*links, *urls]
-
- #if 1 == len(links):
- #saved = os.path.join(glob.TARGET, self.slug, 'saved.html')
- #if not os.path.isfile(saved):
- #h, p = _localcopy_hashpath(links[0])
- #c = self._get_localcopy(links[0], h, p)
- #with open(saved, 'w') as f:
- #f.write(c)
- #f.close()
-
- def index(self, ix):
- """ Write search index """
-
- writer = ix.writer()
-
- c = "%s %s %s %s %s" % (
- self.slug,
- self.summary,
- self._content,
- yaml.dump(self.reactions, Dumper=yaml.RoundTripDumper),
- yaml.dump(self.exifmin, Dumper=yaml.RoundTripDumper)
- )
-
- c = "%s %s" % (c, self._localcopy_include())
-
- if self.img:
- imgstr = self.img.mksrcset(generate_caption=False)
- else:
- imgstr = ''
-
- writer.add_document(
- title=self.title,
- url=self.url,
- content=c,
- date=self.date.datetime,
- tags=",".join(self.tags),
- weight=1,
- img=imgstr
- )
- writer.commit()
-
-
- def pings(self):
- """ Ping (webmention) all URLs found in the post """
-
- links = []
- urlregex = re.compile(
- r'\s+https?\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+'
- r'\.[a-zA-Z0-9\.\/\?\:@\-_=#]*'
- )
- matches = re.findall(urlregex, self.content)
-
- for r in self.reactions.items():
- reactiontype, urls = r
- if isinstance(urls, str):
- matches.append(urls)
- elif isinstance(urls, list):
- matches = [*matches, *urls]
-
- #for s in self.syndicate.keys():
- #matches.append('https://brid.gy/publish/%s' % (s))
-
- if self.utime and self.utime > 0:
- time = self.utime
- else:
- time = self.dtime
-
- if len(matches) > 0:
- for link in matches:
- if glob.conf['site']['domain'] in link:
- continue
-
- if link in links:
- continue
-
- #self._localcopy(link)
- self.pingdb.ping(self.url, link, time)
- links.append(link)
-
-
- def _localcopy_hashpath(self,url):
- h = hashlib.md5(url.encode('utf-8')).hexdigest()
- p = os.path.join(glob.LOCALCOPIES, "%s.html" % (h))
- return (h, p)
-
-
- def _localcopy_include(self):
- links = []
- md = ''
- for r in self.reactions.items():
- reactiontype, urls = r
- if isinstance(urls, str):
- links.append(urls)
- elif isinstance(urls, list):
- links = [*links, *urls]
-
- for url in links:
- h, p = self._localcopy_hashpath(url)
- html = self._get_localcopy(url, h, p)
- md = "%s %s" % (
- md,
- SingularHandler.pandoc_html2md(html, os.stat(p))
- )
-
- return md
-
-
- def _get_localcopy(self, url, h, p):
- html = ''
-
- if os.path.isfile(p):
- with open(p, 'r') as f:
- html = f.read()
- f.close()
- else:
- html = self._make_localcopy(url, h, p)
-
- return html
-
-
- def _make_localcopy(self, url, h, p):
- post = self._pull_localcopy(url)
- tmpl = glob.jinja2env.get_template('localcopy.html')
- html = tmpl.render({'post': post})
- soup = BeautifulSoup(html,"html5lib")
- html = soup.prettify()
-
- with open(p, "w") as f:
- logging.info("saving readable copy of %s to %s", url, p)
- f.write(html)
- f.close()
-
- return html
-
-
- def _pull_localcopy(self, url):
-
- # find the true URL
- # MAYBE: add fallback to archive.org?
- realurl = url
- try:
- pretest = requests.head(url, allow_redirects=True, timeout=30)
- realurl = pretest.url
- except:
- pass
-
- parsed = {
- 'lang': 'en',
- 'url': url,
- 'realurl': realurl,
- 'html': '',
- 'title': '',
- 'excerpt': '',
- 'byline': '',
- }
-
- if 'readable' in glob.conf and \
- 'port' not in glob.conf['readable'] and \
- 'host' not in glob.conf['readable']:
-
- sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- socktest = sock.connect_ex((
- glob.conf['readable']['host'], int(glob.conf['readable']['port'])
- ))
- if 0 == socktest:
- text = self._localcopy_via_proxy(realurl)
- parsed['html'] = text.get('content','')
- parsed['title'] = text.get('title',url)
- parsed['excerpt'] = text.get('excerpt', '')
- parsed['byline'] = text.get('byline', '')
-
- try:
- parsed['lang'] = langdetect.detect(parsed['html'])
- except:
- pass
-
- return parsed
-
- # TODO: fallback to full-python solution if the previous failed
- return parsed
-
-
- def _localcopy_via_proxy(self, url):
- r = "http://%s:%s/api/get?url=%s&sanitize=y" % (
- glob.conf['readable']['host'],
- glob.conf['readable']['port'],
- url
- )
-
- try:
- req = requests.get(r,allow_redirects=False,timeout=60);
- except:
- return None
-
- text = {}
- try:
- text = json.loads(req.text)
- except:
- pass
-
- return text
-
-
- def _adaptify(self):
- """ Generate srcset for all images possible """
-
- linkto = False
- isrepost = None
-
- if len(self.reactions.keys()):
- isrepost = list(self.reactions.keys())[0]
-
- if isrepost:
- if len(self.reactions[isrepost]) == 1:
- linkto = self.reactions[isrepost][0]
-
- mdmatch = re.compile(
- r'!\[.*\]\(.*?\.(?:jpe?g|png|gif)'
- r'(?:\s+[\'\"]?.*?[\'\"]?)?\)(?:\{.*?\})?'
- )
- mdsplit = re.compile(
- r'!\[(.*)\]\((?:\/(?:files|cache)'
- r'(?:\/[0-9]{4}\/[0-9]{2})?\/(.*\.(?:jpe?g|png|gif)))'
- r'(?:\s+[\'\"]?(.*?)[\'\"]?)?\)(?:\{(.*?)\})?'
- )
- mdimg = re.findall(mdmatch, self.content)
- for i in mdimg:
- m = re.match(mdsplit, i)
- if m:
- #logging.info(m.groups())
- imgpath = os.path.join(glob.SFILES, m.group(2))
-
- if not os.path.isfile(imgpath):
- for c in glob.conf['category'].items():
- catn, catd = c
- catp = os.path.abspath(os.path.join(glob.CONTENT, catn))
-
- if not os.path.exists(catp) \
- or not 'type' in catd \
- or catd['type'] != 'photo':
- continue
-
- imgpath = os.path.join(catp, m.group(2))
- break
-
- if os.path.isfile(imgpath):
-
- t = ''
- if m.group(3):
- t = m.group(3)
-
- cl = ''
- if m.group(4):
- cl = m.group(4)
-
- a = ''
- if m.group(1):
- a = m.group(1)
-
- im = img.ImageHandler(
- imgpath,
- alttext=a,
- title=t,
- imgcl=cl,
- linkto=linkto
- )
-
- im.downsize()
- logging.debug("replacing image %s with srcset", imgpath)
- srcset = im.mksrcset()
- if srcset:
- self.content = self.content.replace(i, srcset)
- del(im)
- else:
- logging.error("%s missing %s", m.group(2), self.fpath)
-
- def _video(self):
- """ [video] shortcode extractor """
-
- match = re.compile(r'\[video mp4=\"/(?:files|cache).*?\"\]\[/video\]')
- split = re.compile(r'\[video mp4=\"(/(?:files|cache)\/(.*?))\"\]\[/video\]')
- videos = re.findall(match, self.content)
- for vid in videos:
- v = re.match(split, vid)
- video = """
- """ % (v.group(1))
- self.content = self.content.replace(vid, video)
-
- #def _files(self):
- #""" Copy misc files referenced """
-
- #match = re.compile(
- #r'\s(?:%s)?/(?:files|cache)'
- #r'/.*\.(?:(?!jpe?g|png|gif).*)\s' % (glob.conf['site']['domain'])
- #)
- #split = re.compile(
- #r'\s(?:%s)?/((?:files|cache)'
- #r'/(.*\.(?:(?!jpe?g|png|gif).*)))\s' % (glob.conf['site']['domain'])
- #)
- ##files = re.findall(match, self.content)
- ##print(files)
-
- def _snippets(self):
- """ Replaces [git:(repo)/(file.ext)] with corresponding code snippet """
-
- snmatch = re.compile(r'\[git:[^\/]+\/(?:.*\..*)\]')
- snsplit = re.compile(r'\[git:([^\/]+)\/((?:.*)\.(.*))\]')
- snippets = re.findall(snmatch, self.content)
- isconf = re.compile(r'conf', re.IGNORECASE)
- for snippet in snippets:
- sn = re.match(snsplit, snippet)
- if sn:
- fpath = os.path.join(glob.SOURCE, sn.group(1), sn.group(2))
- if not os.path.isfile(fpath):
- logging.error(
- "missing blogsnippet in %s: %s",
- self.fpath,
- fpath
- )
- continue
-
- if re.match(isconf, sn.group(3)):
- lang = 'apache'
- else:
- lang = sn.group(3)
-
- with open(fpath, "r") as snip:
- c = snip.read()
- snip.close
-
- c = "\n\n```%s\n%s\n```\n" % (lang, c)
- logging.debug("replacing blogsnippet %s", fpath)
- self.content = self.content.replace(snippet, c)
-
- @staticmethod
- def baseN(num, b=36, numerals="0123456789abcdefghijklmnopqrstuvwxyz"):
- """ Used to create short, lowecase slug for a number (an epoch) passed """
- num = int(num)
- return ((num == 0) and numerals[0]) or (
- SingularHandler.baseN(
- num // b,
- b,
- numerals
- ).lstrip(numerals[0]) + numerals[num % b]
- )
-
-
-
-class ArticleHandler(SingularHandler):
-
- def __init__(self, *args, **kwargs):
- super(ArticleHandler, self).__init__(*args, **kwargs)
- self.dctype = 'Text'
- self._setup()
-
- def _setup(self):
- post = frontmatter.load(self.fpath)
- self.meta = post.metadata
- self.content = post.content
- self._content = '%s' % (self.content)
-
- if 'tags' in post.metadata:
- self.tags = post.metadata['tags']
-
- if 'title' in post.metadata:
- self.title = post.metadata['title']
-
- if 'published' in post.metadata:
- self.date = arrow.get(post.metadata['published'])
-
- if 'updated' in post.metadata:
- self.updated = arrow.get(post.metadata['updated'])
-
- if 'summary' in post.metadata:
- self.summary = post.metadata['summary']
-
- if 'redirect' in post.metadata and \
- isinstance(post.metadata['redirect'], list):
- for r in post.metadata['redirect']:
- self.redirect[r] = 1
-
- if 'syndicate' in post.metadata:
- z = post.metadata['syndicate']
- if isinstance(z, str):
- self.syndicate[z] = ''
- elif isinstance(z, dict):
- for s, c in z.items():
- self.syndicate[s] = c
- elif isinstance(z, list):
- for s in z:
- self.syndicate[s] = ''
-
- self.reactions = {}
-
- # getting rid of '-' to avoid css trouble and similar
- rmap = {
- 'bookmark-of': 'bookmark',
- 'repost-of': 'repost',
- 'in-reply-to': 'reply',
- }
-
- for x in rmap.items():
- key, replace = x
- if key in self.meta:
- if isinstance(self.meta[key], str):
- self.reactions[replace] = [self.meta[key]]
- elif isinstance(self.meta[key], list):
- self.reactions[replace] = self.meta[key]
-
- self._adaptify()
- self._snippets()
- self._video()
- #self._files()
- super(ArticleHandler, self)._postsetup()
-
-
-class PhotoHandler(SingularHandler):
-
- def __init__(self, *args, **kwargs):
- super(PhotoHandler, self).__init__(*args, **kwargs)
- self.dctype = 'Image'
- self.img = img.ImageHandler(self.fpath)
- self.exif = self.img.exif
- self._setup()
-
- def _setup(self):
- self.syndicate = {
- 'flickr': '',
- }
-
- keywords = [
- 'XMP:Keywords',
- 'IPTC:Keywords'
- ]
- tags = {}
- for key in keywords:
- if key in self.exif and self.exif[key]:
-
- if isinstance(self.exif[key], str):
- self.exif[key] = self.exif[key].split(",")
-
- if isinstance(self.exif[key], list):
- for tag in self.exif[key]:
- tags[str(tag).strip()] = 1
-
- self.tags = list(tags.keys())
-
- # content
- keywords = [
- 'XMP:Description',
- 'IPTC:Caption-Abstract'
- ]
- for key in keywords:
- if key in self.exif and self.exif[key]:
- self.content = self.exif[key]
- break
- self._content = '%s' % (self.content)
-
- # title
- keywords = [
- 'XMP:Title',
- 'XMP:Headline',
- 'IPTC:Headline'
- ]
- for key in keywords:
- if key in self.exif and self.exif[key]:
- self.title = self.exif[key]
- break
-
- # datetime
- keywords = [
- 'XMP:DateTimeDigitized',
- 'XMP:CreateDate',
- 'EXIF:CreateDate',
- 'EXIF:ModifyDate'
- ]
-
- pattern = re.compile(
- "(?P[0-9]{4}):(?P[0-9]{2}):(?P[0-9]{2})\s+"
- "(?P[0-9]{2}:[0-9]{2}:[0-9]{2})Z?"
- )
-
- for key in keywords:
- if key not in self.exif or not self.exif[key]:
- continue
-
- date = None
- v = pattern.match(self.exif[key]).groupdict()
- if not v:
- continue
-
- try:
- date = arrow.get('%s-%s-%s %s' % (v['Y'], v['M'], v['D'], v['T']))
- except:
- continue
-
- if date:
- self.date = date
- logging.debug("date for %s is set to %s from key %s", self.fname, self.date, key)
- break
-
- self.img.title = self.title
- self.img.alttext = self.content
- self.content = self.content + "\n\n" + self.img.mksrcset(generate_caption=False, uphoto=True)
-
- self.img.downsize()
- self.srcset = self.img.mksrcset(generate_caption=False, uphoto=False)
- super(PhotoHandler, self)._postsetup()
-
-
- def tmpl(self):
- tmpl = super(PhotoHandler, self).tmpl()
- tmpl['exif'] = {}
-
- mapping = {
- 'camera': [
- 'EXIF:Model'
- ],
- 'aperture': [
- 'EXIF:FNumber',
- 'Composite:Aperture'
- ],
- 'shutter_speed': [
- 'EXIF:ExposureTime'
- ],
- 'focallength': [
- 'EXIF:FocalLength',
- 'Composite:FocalLength35efl',
- ],
- 'iso': [
- 'EXIF:ISO'
- ],
- 'lens': [
- 'Composite:LensID',
- 'MakerNotes:Lens',
- 'Composite:LensSpec'
- ]
- }
-
- for ekey, candidates in mapping.items():
- for candidate in candidates:
- if candidate in self.exif:
- tmpl['exif'][ekey] = self.exif[candidate]
- break
-
- gps = ['Latitude', 'Longitude']
- for g in gps:
- gk = 'EXIF:GPS%s' % (g)
- if gk not in self.exif:
- continue
-
- r = 'EXIF:GPS%sRef' % (g)
- ref = None
- if r in self.exif:
- ref = self.exif[r]
-
- tmpl['exif']['geo_%s' % (g.lower())] = self.gps2dec(
- self.exif[gk],
- ref
- )
-
- ##tmpl['imgurl'] = ''
- #sizes = collections.OrderedDict(reversed(list(self.img.sizes.items())))
- #for size, meta in sizes.items():
- #if os.path.isfile(meta['path']):
- #with Image.open(meta['path']) as im:
- #meta['width'], meta['height'] = im.size
- #meta['size'] = os.path.getsize(meta['path'])
- #tmpl['img'] = meta
- #break
-
- tmpl['img'] = self.img.meta
- return tmpl
-
-
- @staticmethod
- def gps2dec(exifgps, ref=None):
- pattern = re.compile(r"(?P[0-9.]+)\s+deg\s+(?P[0-9.]+)'\s+(?P[0-9.]+)\"(?:\s+(?P[NEWS]))?")
- v = pattern.match(exifgps).groupdict()
-
- dd = float(v['deg']) + (((float(v['min']) * 60) + (float(v['sec']))) / 3600)
- if ref == 'West' or ref == 'South' or v['dir'] == "S" or v['dir'] == "W":
- dd = dd * -1
- return round(dd, 6)
-
-
-
-class PageHandler(SingularHandler):
-
- def __init__(self, *args, **kwargs):
- super(PageHandler, self).__init__(*args, **kwargs)
- self._setup()
-
- def _setup(self):
- with open(self.fpath) as c:
- self.content = c.read()
- c.close()
-
- self._content = '%s' % (self.content)
- self._adaptify()
- super(PageHandler, self)._postsetup()
- self.template = 'page.html'
\ No newline at end of file
diff --git a/taxonomy.py b/taxonomy.py
deleted file mode 100644
index f69f711..0000000
--- a/taxonomy.py
+++ /dev/null
@@ -1,253 +0,0 @@
-import math
-import logging
-import os
-import collections
-import json
-import glob
-from slugify import slugify
-from bs4 import BeautifulSoup
-from pprint import pprint
-
-class TaxonomyHandler(object):
-
- def __init__(self, taxonomy='', name='', description='', exclude=False):
- self.taxonomy = taxonomy
- self.name = name
- self.description = description
- self.exclude = exclude
- self.slug = slugify(self.name, only_ascii=True, lower=True)
- self.posts = collections.OrderedDict()
-
- self.taxp = os.path.join(glob.TARGET, self.taxonomy)
- self.simplepath = os.path.join(self.taxp, 'index.html')
- self.basep = os.path.join(self.taxp, self.slug)
- self.pagedp = os.path.join(self.basep, 'page')
- self.indexpath = os.path.join(self.basep, 'index.html')
-
- self.lptime = 0
-
- def __getitem__(self, key):
- return self.posts[key]
-
- def __repr__(self):
- return 'Taxonomy %s (name: %s, slug: %s) with %i posts' % (
- self.taxonomy,
- self.name,
- self.slug,
- len(self.posts)
- )
-
- def __next__(self):
- try:
- r = self.posts.next()
- except:
- raise StopIteration()
- return r
-
- def __iter__(self):
- for ix, post in self.posts.items():
- yield post
- return
-
-
- def append(self, post):
- k = int(post.date.timestamp)
- if k in self.posts:
- inc = 1
- while k in self.posts:
- k = int(k+1)
-
- self.posts[k] = post
- self.posts = collections.OrderedDict(sorted(self.posts.items(), reverse=True))
-
-
- def index(self, ix):
- """ Write search index """
-
- writer = ix.writer()
-
- t, lp = list(self.posts.items())[0]
-
- writer.add_document(
- title=self.name,
- url="%s/%s/%s" % (glob.conf['site']['url'], self.taxonomy, self.slug),
- content="%s %s" % (self.name, self.slug),
- date=lp.date.datetime,
- tags=",".join([self.name]),
- weight=10
- )
- writer.commit()
-
-
- def _test_freshness(self):
- t, lp = list(self.posts.items())[0]
- self.lptime = lp.ftime.st_mtime
-
- if os.path.isfile(self.indexpath):
- p = self.indexpath
- elif os.path.isfile(self.simplepath):
- p = self.simplepath
- else:
- return False
-
- itime = os.stat(p)
- if itime.st_mtime == self.lptime and not glob.FORCEWRITE:
- logging.debug(
- 'Taxonomy tree is fresh for %s' % (self.name)
- )
- return True
-
- return False
-
-
- def _test_dirs(self):
- if not os.path.isdir(self.taxp):
- os.mkdir(self.taxp)
- if not os.path.isdir(self.basep):
- os.mkdir(self.basep)
-
-
- def write_paginated(self):
-
- if self._test_freshness():
- return
-
- self._test_dirs()
-
- taxp = os.path.join(glob.TARGET, self.taxonomy)
- basep = os.path.join(glob.TARGET, self.taxonomy, self.slug)
-
- if not os.path.isdir(taxp):
- os.mkdir(taxp)
- if not os.path.isdir(basep):
- os.mkdir(basep)
-
-
- pages = math.ceil(len(self.posts) / glob.conf['perpage'])
- page = 1
-
-
- if len(self.taxonomy) and len(self.slug):
- base_url = "/%s/%s/" % (self.taxonomy, self.slug)
- else:
- base_url = '/'
-
-
- while page <= pages:
- start = int((page-1) * int(glob.conf['perpage']))
- end = int(start + int(glob.conf['perpage']))
- dorss = False
- posttmpls = [self.posts[k].tmpl() for k in list(sorted(
- self.posts.keys(), reverse=True))[start:end]]
-
- if page == 1:
- tpath = self.indexpath
- do_rss = True
- # RSS
-
- else:
- do_rss = False
- if not os.path.isdir(self.pagedp):
- os.mkdir(self.pagedp)
-
- tdir = os.path.join(self.pagedp, "%d" % page)
-
- if not os.path.isdir(tdir):
- os.mkdir(tdir)
- tpath = os.path.join(tdir, "index.html")
-
- tvars = {
- 'taxonomy': {
- 'url': base_url,
- 'name': self.name,
- 'taxonomy': self.taxonomy,
- 'description': self.description,
- 'paged': page,
- 'total': pages,
- 'perpage': glob.conf['perpage'],
- },
- 'site': glob.conf['site'],
- 'posts': posttmpls,
- }
-
-
- tmpl = glob.jinja2env.get_template('archive.html')
- logging.info("rendering %s" % (tpath))
- with open(tpath, "w") as html:
- r = tmpl.render(tvars)
- soup = BeautifulSoup(r, "html5lib")
- r = soup.prettify()
- logging.info("writing %s" % (tpath))
- html.write(r)
- html.close()
- os.utime(tpath, (self.lptime, self.lptime))
-
- if do_rss:
- feeddir = os.path.join(self.basep, 'feed')
- if not os.path.isdir(feeddir):
- os.mkdir(feeddir)
- feedpath = os.path.join(feeddir, "index.xml")
- tmpl = glob.jinja2env.get_template('rss.html')
- logging.info("rendering %s" % (feedpath))
- with open(feedpath, "w") as html:
- r = tmpl.render(tvars)
- logging.info("writing %s" % (feedpath))
- html.write(r)
- html.close()
- os.utime(feedpath, (self.lptime, self.lptime))
-
- page = page+1
-
- def write_simple(self, template='archive.html'):
-
- if self._test_freshness():
- return
-
- self._test_dirs()
-
- base_url = "/%s/" % (self.slug)
-
- posttmpls = [self.posts[k].tmpl() for k in list(sorted(
- self.posts.keys(), reverse=True))]
-
- tvars = {
- 'taxonomy': {
- 'url': base_url,
- 'name': self.name,
- 'taxonomy': self.taxonomy,
- 'description': self.description,
- 'paged': 0,
- 'total': 0,
- 'perpage': glob.conf['perpage'],
- },
- 'site': glob.conf['site'],
- 'posts': posttmpls,
- }
-
- with open(os.path.join(self.simplepath), "w") as html:
- html.write(json.dumps(tvars, indent=4, sort_keys=True, default=str))
- html.close()
-
- #tmpl = glob.jinja2env.get_template('gallery.html')
- #logging.info("rendering %s" % (indexpath))
- #with open(indexpath, "w") as html:
- #r = tmpl.render(tvars)
- #soup = BeautifulSoup(r, "html5lib")
- #r = soup.prettify()
- #logging.info("writing %s" % (indexpath))
- #html.write(r)
- #html.close()
- #os.utime(indexpath, (lptime, lptime))
-
-
- def writesitemap(self):
- sitemap = "%s/sitemap.txt" % (glob.TARGET)
- urls = []
- for p in self.posts.items():
- t, data = p
- urls.append( "%s/%s" % ( glob.conf['site']['url'], data.slug ) )
-
- with open(sitemap, "w") as f:
- logging.info("writing %s" % (sitemap))
- f.write("\n".join(urls))
- f.close()
\ No newline at end of file
diff --git a/update.sh b/update.sh
deleted file mode 100755
index aba1b63..0000000
--- a/update.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env bash
-
-if [ -f "/tmp/petermolnar.net.generator.lock" ]; then
- exit 0;
-fi;
-
-lastfile="$(find /home/petermolnar.net/source/ -type f -name *.md -printf '%T+ %p\n' | sort | tail -n1 | awk '{print $2}')";
-lastfilemod=$(stat -c %Y "$lastfile");
-lastrunfile="/tmp/generator_last_run";
-lastrun=0;
-
-if [ -f "$lastrunfile" ]; then
- lastrun=$(stat -c %Y "$lastrunfile");
-fi;
-
-if [ "$lastrun" -lt "$lastfilemod" ]; then
- cd /home/petermolnar.net/src; ../.venv/bin/python3.5 generator.py;
-fi;
-
-exit 0;
diff --git a/webmentiondb.py b/webmentiondb.py
deleted file mode 100644
index 42f27ce..0000000
--- a/webmentiondb.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import os
-import hashlib
-import logging
-import glob
-from webmentiontools.send import WebmentionSend
-import requests
-import json
-
-class WebmentionDB(object):
- dbpath = glob.WEBMENTIONDB
-
- def __init__(self):
- self.sent = {}
- self._loaddb()
-
- def _loaddb(self):
- if os.path.isfile(self.dbpath):
- logging.info("loading pinged database")
- with open(self.dbpath, 'r') as db:
- self.sent = json.loads(db.read())
-
- def _dumpdb(self):
- with open(self.dbpath, "w") as db:
- logging.info("writing pinged database")
- db.write(json.dumps(self.sent, indent=4, sort_keys=True))
- db.close()
-
- def _refreshdb(self):
- self._dumpdb()
- self._loaddb()
-
- def __getitem__(self, key):
- r = {}
- for i in self.sent.items():
- h, data = i
- if data['source'] == key:
- r[data['target']] = {
- 'time': data['time'],
- 'response': data['response']
- }
-
- return r
-
-
- def __len__(self):
- return len(self.sent)
-
-
- def posses(self, key):
- r = []
- for i in self.sent.items():
- h, data = i
-
- if data['source'] != key:
- continue
-
- if not len(data['response']):
- continue
-
- if 'url' not in data['response']:
- continue
-
- r.append(data['response']['url'])
-
- return r
-
-
- def ping(self, source, target, time=0, posse=False):
- resp = {}
- source = source.strip()
- target = target.strip()
-
- h = source + target + "%i" % (int(time))
- h = h.encode('utf-8')
- h = hashlib.sha1(h).hexdigest()
- if h in self.sent.keys():
- logging.debug("already pinged: %s" % (target))
- return True
-
- logging.debug("pinging: %s" % (target))
-
- wm = WebmentionSend(source, target)
- if hasattr(wm, 'response'):
- resp = wm.response
-
- # fire and forget archive.org call
- try:
- verify = requests.get(
- '%s%s' % ('https://web.archive.org/save/', target),
- allow_redirects=False,
- timeout=30,
- )
- except:
- pass
-
- self.sent[h] = {
- 'source': source,
- 'target': target,
- 'time': time,
- 'response': resp
- }
-
- self._refreshdb()
\ No newline at end of file