diff --git a/.gitignore b/.gitignore
index bf84ffe..8d411c1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -101,3 +101,4 @@ ENV/
diff --git a/cache.py b/cache.py
deleted file mode 100644
index 22d05bc..0000000
--- a/cache.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import os
-import json
-import hashlib
-import logging
-import glob
-class Cached(object):
- def __init__(self, hash='', text='', stime=0):
- if not os.path.isdir(glob.CACHE):
- os.mkdir(glob.CACHE)
- if hash:
- self._hbase = hash
- elif text:
- self._hbase = hashlib.sha1(text.encode('utf-8')).hexdigest()
- else:
- print("No identifier passed for Cached")
- raise
- self._cpath = os.path.join(glob.CACHE, self._hbase)
- self._stime = stime
- if os.path.isfile(self._cpath):
- self._ctime = os.stat(self._cpath)
- else:
- self._ctime = None
- def get(self):
- if not glob.CACHEENABLED:
- return None
- cached = ''
- if os.path.isfile(self._cpath):
- if self._stime and self._stime.st_mtime == self._ctime.st_mtime:
- logging.debug("Cache exists at %s; using it" % (self._cpath ))
- with open(self._cpath, 'r') as c:
- cached = c.read()
- c.close()
- # invalidate old
- elif self._stime and self._stime.st_mtime > self._ctime.st_mtime:
- logging.debug("invalidating cache at %s" % (self._cpath ))
- os.remove(self._cpath)
- return cached
- def set(self, content):
- if not glob.CACHEENABLED:
- return None
- with open(self._cpath, "w") as c:
- logging.debug("writing cache to %s" % (self._cpath ))
- c.write(content)
- c.close()
- if self._stime:
- os.utime(self._cpath, (self._stime.st_mtime, self._stime.st_mtime ))
\ No newline at end of file
diff --git a/generator.py b/generator.py
deleted file mode 100644
index 6a365d4..0000000
--- a/generator.py
+++ /dev/null
@@ -1,293 +0,0 @@
-"""Usage: generator.py [-h] [-f] [-g] [-p] [-d] [-s FILE]
--h --help show this
--f --force force HTML file rendering
--p --pandoc force re-rendering content HTML
--g --regenerate regenerate images
--s --single FILE only (re)generate a single entity
--d --debug set logging level
-import os
-import shutil
-import logging
-import atexit
-import json
-import sys
-import tempfile
-import glob
-from whoosh import index
-from docopt import docopt
-from ruamel import yaml
-from webmentiontools.send import WebmentionSend
-import taxonomy
-import singular
-from slugify import slugify
-import arrow
-class Engine(object):
- lockfile = "/tmp/petermolnar.net.generator.lock"
- def __init__(self):
- if os.path.isfile(self.lockfile):
- raise ValueError("Lockfile %s is present; generator won't run.")
- else:
- with open(self.lockfile, "w") as lock:
- lock.write(arrow.utcnow().format())
- lock.close()
- atexit.register(self.removelock)
- atexit.register(self.removetmp)
- self._mkdirs()
- self.tags = {}
- self.category = {}
- self.allposts = None
- self.frontposts = None
- self.slugsdb = os.path.join(glob.CACHE, "slugs.json")
- if os.path.isfile(self.slugsdb):
- with open(self.slugsdb) as slugsdb:
- self.allslugs = json.loads(slugsdb.read())
- slugsdb.close()
- else:
- self.allslugs = []
- self.tmpwhoosh = tempfile.mkdtemp('whooshdb_', dir=tempfile.gettempdir())
- self.whoosh = index.create_in(self.tmpwhoosh, glob.schema)
- def removelock(self):
- os.unlink(self.lockfile)
- def removetmp(self):
- if os.path.isdir(self.tmpwhoosh):
- for root, dirs, files in os.walk(self.tmpwhoosh, topdown=False):
- for f in files:
- os.remove(os.path.join(root, f))
- for d in dirs:
- os.rmdir(os.path.join(root, d))
- def initbuilder(self):
- self._copy_and_compile()
- def cleanup(self):
- with open(os.path.join(glob.CACHE, "slugs.json"), "w") as db:
- logging.info("updating slugs database")
- db.write(json.dumps(self.allslugs))
- db.close()
- tags = []
- for tslug, taxonomy in self.tags.items():
- tags.append(taxonomy.name)
- with open(os.path.join(glob.CACHE, "tags.json"), "w") as db:
- logging.info("updating tags database")
- db.write(json.dumps(tags))
- db.close()
- logging.info("deleting old searchdb")
- shutil.rmtree(glob.SEARCHDB)
- logging.info("moving new searchdb")
- shutil.move(self.tmpwhoosh, glob.SEARCHDB)
- def _mkdirs(self):
- for d in [glob.TARGET, glob.TFILES, glob.TTHEME, glob.CACHE]:
- if not os.path.isdir(d):
- os.mkdir(d)
- def _copy_and_compile(self):
- for f in os.listdir(glob.STHEME):
- p = os.path.join(glob.STHEME, f)
- if os.path.isdir(p):
- try:
- shutil.copytree(p, os.path.join(glob.TTHEME, f))
- except FileExistsError:
- pass
- else:
- path, fname = os.path.split(p)
- fname, ext = os.path.splitext(fname)
- logging.debug("copying %s", p)
- shutil.copy(p, os.path.join(glob.TTHEME, f))
- @staticmethod
- def postbycategory(fpath, catd=None, catn=None):
- if catd == 'photo':
- post = singular.PhotoHandler(fpath, category=catn)
- elif catd == 'page':
- post = singular.PageHandler(fpath)
- else:
- post = singular.ArticleHandler(fpath, category=catn)
- return post
- def collect(self):
- self.allposts = taxonomy.TaxonomyHandler()
- #self.gallery = taxonomy.TaxonomyHandler(taxonomy="photography", name="Photography")
- self.frontposts = taxonomy.TaxonomyHandler()
- for category in glob.conf['category'].items():
- catn, catd = category
- catp = os.path.abspath(os.path.join(glob.CONTENT, catn))
- if not os.path.exists(catp):
- continue
- logging.debug("getting posts for category %s from %s", catn, catp)
- cat = taxonomy.TaxonomyHandler(taxonomy='category', name=catn)
- self.category[catn] = cat
- for f in os.listdir(catp):
- fpath = os.path.join(catp, f)
- if not os.path.isfile(fpath):
- continue
- logging.debug("parsing %s", fpath)
- exclude = False
- if 'exclude' in catd:
- exclude = bool(catd['exclude'])
- ct = None
- if 'type' in catd:
- ct = catd['type']
- post = Engine.postbycategory(fpath, catd=ct, catn=catn)
- self.allposts.append(post)
- if post.dtime > arrow.utcnow().timestamp:
- logging.warning(
- "Post '%s' will be posted in the future; "
- "skipping it from Taxonomies for now", fpath
- )
- else:
- cat.append(post)
- if not exclude:
- self.frontposts.append(post)
- if hasattr(post, 'tags') and isinstance(post.tags, list):
- for tag in post.tags:
- tslug = slugify(tag, only_ascii=True, lower=True)
- if not tslug in self.tags.keys():
- t = taxonomy.TaxonomyHandler(taxonomy='tag', name=tag)
- self.tags[tslug] = t
- else:
- t = self.tags[tslug]
- t.append(post)
- elif not hasattr(post, 'tags'):
- logging.error("%s post does not have tags", post.fname)
- elif not isinstance(post.tags, list):
- logging.error(
- "%s tags are not a list, it's %s ",
- post.fname,
- type(post.tags)
- )
- for r in post.redirect.keys():
- self.allslugs.append(r)
- self.allslugs.append(post.fname)
- def renderposts(self):
- for p in self.allposts.posts.items():
- time, post = p
- post.write()
- post.redirects()
- post.pings()
- post.index(self.whoosh)
- def rendertaxonomies(self):
- for t in [self.tags, self.category]:
- for tname, tax in t.items():
- if glob.conf['category'].get(tname, False):
- if glob.conf['category'][tname].get('nocollection', False):
- logging.info("skipping taxonomy '%s' due to config nocollections", tname)
- continue
- tax.write_paginated()
- tax.index(self.whoosh)
- self.frontposts.write_paginated()
- #self.gallery.write_simple(template='gallery.html')
- self.allposts.writesitemap()
- def globredirects(self):
- redirects = os.path.join(glob.CONTENT,'redirects.yml')
- if not os.path.isfile(redirects):
- return
- ftime = os.stat(redirects)
- rdb = {}
- with open(redirects, 'r') as db:
- rdb = yaml.safe_load(db)
- db.close()
- for r_ in rdb.items():
- target, slugs = r_
- for slug in slugs:
- singular.SingularHandler.write_redirect(
- slug,
- "%s/%s" % (glob.conf['site']['url'], target),
- ftime.st_mtime
- )
- def recordlastrun(self):
- if os.path.exists(glob.lastrun):
- t = arrow.utcnow().timestamp
- os.utime(glob.lastrun, (t,t))
- else:
- open(glob.lastrun, 'a').close()
-if __name__ == '__main__':
- args = docopt(__doc__, version='generator.py 0.2')
- if args['--pandoc']:
- glob.CACHEENABLED = False
- if args['--force']:
- glob.FORCEWRITE = True
- if args['--regenerate']:
- glob.REGENERATE = True
- logform = '%(asctime)s - %(levelname)s - %(message)s'
- if args['--debug']:
- loglevel = 10
- else:
- loglevel = 40
- while len(logging.root.handlers) > 0:
- logging.root.removeHandler(logging.root.handlers[-1])
- logging.basicConfig(level=loglevel, format=logform)
- if args['--single']:
- logging.info("(re)generating a single item only")
- path = args['--single'].split('/')
- fpath = os.path.join(glob.CONTENT, path[0], path[1])
- post = Engine.postbycategory(fpath, catd=path[0])
- post.pings()
- post.write()
- sys.exit(0)
- else:
- eng = Engine()
- eng.initbuilder()
- eng.collect()
- eng.renderposts()
- eng.globredirects()
- eng.rendertaxonomies()
- eng.recordlastrun()
- eng.cleanup()
\ No newline at end of file
diff --git a/glob.py b/glob.py
deleted file mode 100644
index 1ab69fa..0000000
--- a/glob.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import logging
-from ruamel import yaml
-from whoosh import fields
-from whoosh import analysis
-import jinja2
-from slugify import slugify
-import arrow
-schema = fields.Schema(
- url=fields.ID(
- stored=True,
- ),
- title=fields.TEXT(
- stored=True,
- analyzer=analysis.FancyAnalyzer(
- )
- ),
- date=fields.DATETIME(
- stored=True,
- sortable=True
- ),
- content=fields.TEXT(
- stored=True,
- analyzer=analysis.FancyAnalyzer(
- )
- ),
- tags=fields.TEXT(
- stored=True,
- analyzer=analysis.KeywordAnalyzer(
- lowercase=True,
- commas=True
- )
- ),
- weight=fields.NUMERIC(
- sortable=True
- ),
- img=fields.TEXT(
- stored=True
- )
-BASEDIR = os.path.dirname(os.path.abspath(__file__))
-CONFIG = os.path.abspath(os.path.join(BASEDIR, 'config.yml'))
-with open(CONFIG, 'r') as c:
- conf = yaml.safe_load(c)
- conf['site']['author'] = conf['author']
- c.close()
-secrets = os.path.abspath(os.path.join(BASEDIR, 'secret.yml'))
-if os.path.isfile(secrets):
- with open(secrets, 'r') as c:
- conf['secrets'] = yaml.safe_load(c)
- c.close()
-ISODATE = '%Y-%m-%dT%H:%M:%S%z'
-SOURCE = os.path.abspath(conf['dirs']['source']['root'])
-CONTENT = os.path.abspath(conf['dirs']['source']['content'])
-FONT = os.path.abspath(conf['dirs']['font'])
-STHEME = os.path.abspath(conf['dirs']['source']['theme'])
-SFILES = os.path.abspath(conf['dirs']['source']['files'])
-TEMPLATES = os.path.abspath(conf['dirs']['source']['templates'])
-COMMENTS = os.path.abspath(conf['dirs']['source']['comments'])
-TARGET = os.path.abspath(conf['dirs']['target']['root'])
-TTHEME = os.path.abspath(conf['dirs']['target']['theme'])
-TFILES = os.path.abspath(conf['dirs']['target']['files'])
-UFILES = conf['dirs']['target']['furl']
-CACHE = os.path.abspath(conf['dirs']['cache'])
-SEARCHDB = os.path.abspath(conf['dirs']['searchdb'])
-WEBMENTIONDB = os.path.abspath(conf['webmentiondb'])
-LOGDIR = os.path.abspath(conf['dirs']['log'])
-GPSDIR = os.path.abspath(conf['dirs']['gps'])
-TSDBDIR = os.path.abspath(conf['dirs']['tsdb'])
-LOCALCOPIES = os.path.abspath(conf['dirs']['localcopies'])
-lastrun = '/tmp/generator_last_run'
-os.environ.setdefault('PYPANDOC_PANDOC', '/usr/bin/pandoc')
-def jinja_filter_date(d, form='%Y-%m-%d %H:%m:%S'):
- if d == 'now':
- return arrow.now().strftime(form)
- if form == 'c':
- form = '%Y-%m-%dT%H:%M:%S%z'
- return d.strftime(form)
-def jinja_filter_slugify(s):
- return slugify(s, only_ascii=True, lower=True)
-def jinja_filter_search(s, r):
- if r in s:
- return True
- return False
-jinjaldr = jinja2.FileSystemLoader(searchpath=TEMPLATES)
-jinja2env = jinja2.Environment(loader=jinjaldr)
-jinja2env.filters['date'] = jinja_filter_date
-jinja2env.filters['search'] = jinja_filter_search
-jinja2env.filters['slugify'] = jinja_filter_slugify
\ No newline at end of file
diff --git a/img.py b/img.py
deleted file mode 100644
index 3156f78..0000000
--- a/img.py
+++ /dev/null
@@ -1,370 +0,0 @@
-import os
-import re
-import sys
-import json
-import shutil
-import collections
-import logging
-import imghdr
-from ctypes import c_void_p, c_size_t
-import glob
-import pyexifinfo
-from similar_text import similar_text
-from cache import Cached
-import wand.api
-import wand.image
-import wand.drawing
-import wand.color
-from PIL import Image
-#from subprocess import call
-# https://stackoverflow.com/questions/34617422/how-to-optimize-image-size-using-wand-in-python
-wand.api.library.MagickSetCompressionQuality.argtypes = [c_void_p, c_size_t]
-class ImageHandler(object):
- def __init__(self, fpath, alttext='', title='', imgcl='', linkto=False):
- self.fpath = os.path.abspath(fpath)
- path, fname = os.path.split(self.fpath)
- fname, ext = os.path.splitext(fname)
- self.fname = fname
- self.fext = ext
- self.ftime = os.stat(self.fpath)
- self.linkto = linkto
- self.alttext = alttext
- self.title = title
- self.imgcl = imgcl
- self.c = os.path.join(glob.TFILES, self.fname)
- self.u = "%s/%s/%s" % (glob.conf['site']['url'],glob.UFILES, self.fname)
- self.what = imghdr.what(self.fpath)
- self.meta = {}
- self.exif = {}
- if self.what == 'jpeg':
- self._setexif()
- self.watermark = ''
- wfile = os.path.join(glob.SOURCE, glob.conf['watermark'])
- if os.path.isfile(wfile):
- self.watermark = wfile
- sizes = {
- 90: {
- 'ext': 's',
- 'cropped': True,
- },
- 360: {
- 'ext': 'm',
- },
- #540: 'n',
- 720: {
- 'ext': 'z',
- },
- #980: 'c',
- 1280: {
- 'ext': 'b',
- }
- }
- self.sizes = collections.OrderedDict(sorted(sizes.items(), reverse=0))
- for size, meta in self.sizes.items():
- meta['path'] = "%s_%s%s" % (self.c, meta['ext'], self.fext)
- meta['url'] = "%s_%s%s" % (self.u, meta['ext'], self.fext)
- meta['mime'] = "image/%s" % (self.what)
- self._setmeta()
- self.fallbacksize = 720
- self.srcsetmin = 720
- self._is_photo()
- if self.is_photo:
- self.srcset = self.mksrcset(generate_caption=False, uphoto=False)
- def _setmeta(self):
- s = collections.OrderedDict(reversed(list(self.sizes.items())))
- for size, meta in s.items():
- if os.path.isfile(meta['path']):
- with Image.open(meta['path']) as im:
- meta['width'], meta['height'] = im.size
- meta['size'] = os.path.getsize(meta['path'])
- self.meta = meta
- break
- def downsize(self, liquidcrop=True, watermark=True):
- if not self._is_downsizeable():
- return self._copy()
- if not self._isneeded():
- logging.debug("downsizing not needed for %s", self.fpath)
- return
- logging.debug("downsizing %s", self.fpath)
- try:
- img = wand.image.Image(filename=self.fpath)
- img.auto_orient()
- except:
- print("Unexpected error:", sys.exc_info()[0])
- raise
- # watermark
- if self.is_photo and self.watermark and img.format == "JPEG" and watermark:
- img = self._watermark(img)
- elif self.linkto:
- img = self._sourceurlmark(img)
- # resize & cache
- for size, meta in self.sizes.items():
- self._intermediate(img, size, meta)
- self._setmeta()
- def _setexif(self):
- cached = Cached(text=self.fname, stime=self.ftime)
- cexif = cached.get()
- if cexif:
- self.exif = json.loads(cexif)
- else:
- exif = pyexifinfo.get_json(self.fpath)
- self.exif = exif.pop()
- cached.set(json.dumps(self.exif))
- def _is_photo(self):
- self.is_photo = False
- if 'cameras' in glob.conf:
- if 'EXIF:Model' in self.exif:
- if self.exif['EXIF:Model'] in glob.conf['cameras']:
- self.is_photo = True
- if 'copyright' in glob.conf:
- if 'IPTC:CopyrightNotice' in self.exif:
- for s in glob.conf['copyright']:
- pattern = re.compile(r'%s' % s)
- if pattern.search(self.exif['IPTC:CopyrightNotice']):
- self.is_photo = True
- if self.is_photo:
- #self.category = "photo"
- if not self.alttext:
- keywords = ['XMP:Description', 'IPTC:Caption-Abstract']
- for key in keywords:
- if key in self.exif and self.exif[key]:
- self.alttext = self.exif[key]
- break
- if not self.title:
- keywords = ['XMP:Title', 'XMP:Headline', 'IPTC:Headline']
- for key in keywords:
- if key in self.exif and self.exif[key]:
- self.title = self.exif[key]
- break
- def _is_downsizeable(self):
- if self.what != 'jpeg' and self.what != 'png':
- return False
- if self.imgcl:
- return False
- return True
- def _watermark(self, img):
- wmark = wand.image.Image(filename=self.watermark)
- if img.width > img.height:
- w = img.width * 0.16
- h = wmark.height * (w / wmark.width)
- x = img.width - w - (img.width * 0.01)
- y = img.height - h - (img.height * 0.01)
- else:
- w = img.height * 0.16
- h = wmark.height * (w / wmark.width)
- x = img.width - h - (img.width * 0.01)
- y = img.height - w - (img.height * 0.01)
- w = round(w)
- h = round(h)
- x = round(x)
- y = round(y)
- wmark.resize(w, h)
- if img.width < img.height:
- wmark.rotate(-90)
- img.composite(image=wmark, left=x, top=y)
- return img
- def _sourceurlmark(self, img):
- with wand.drawing.Drawing() as draw:
- draw.fill_color = wand.color.Color('#fff')
- draw.fill_opacity = 0.8
- draw.stroke_color = wand.color.Color('#fff')
- draw.stroke_opacity = 0.8
- r_h = round(img.height * 0.3)
- r_top = round((img.height/2) - (r_h/2))
- draw.rectangle(
- left=0,
- top=r_top,
- width=img.width,
- height=r_h
- )
- draw(img)
- with wand.drawing.Drawing() as draw:
- draw.font = os.path.join(glob.FONT)
- draw.font_size = round((img.width)/len(self.linkto)*1.5)
- draw.gravity = 'center'
- draw.text(
- 0,
- 0,
- self.linkto
- )
- draw(img)
- return img
- def _copy(self):
- p = self.c + self.fext
- if not os.path.isfile(p):
- logging.debug("copying %s" % self.fpath)
- shutil.copy(self.fpath, p)
- return
- def _isneeded(self):
- # skip existing
- needed = False
- if glob.REGENERATE:
- needed = True
- else:
- for size, meta in self.sizes.items():
- if not os.path.isfile(meta['path']):
- needed = True
- return needed
- def _intermediate_dimensions(self, img, size, meta):
- if (img.width > img.height and 'crop' not in meta) \
- or (img.width < img.height and 'crop' in meta):
- width = size
- height = int(float(size / img.width) * img.height)
- else:
- height = size
- width = int(float(size / img.height) * img.width)
- return (width, height)
- def _intermediate_symlink(self, meta):
- # create a symlink to the largest resize with the full filename;
- # this is to ensure backwards compatibility and avoid 404s
- altsrc = meta['path']
- altdst = self.c + self.fext
- if not os.path.islink(altdst):
- if os.path.isfile(altdst):
- os.unlink(altdst)
- os.symlink(altsrc, altdst)
- def _intermediate(self, img, size, meta):
- # skip existing unless regenerate needed
- if os.path.isfile(meta['path']) and not glob.REGENERATE:
- return
- # too small images: move on
- #if size > img.height and size > img.width:
- # return
- width, height = self._intermediate_dimensions(img, size, meta)
- try:
- thumb = img.clone()
- thumb.resize(width, height)
- #thumb.resize(width, height, filter='robidouxsharp')
- if 'crop' in meta and liquidcrop:
- thumb.liquid_rescale(size, size, 1, 1)
- elif 'crop' in meta:
- l = t = 0
- if width > size:
- l = int((width - size) / 2)
- if height > size:
- t = int((height - size) / 2)
- thumb.crop(left=l, top=t, width=size, height=size)
- if img.format == "PNG":
- library.MagickSetCompressionQuality(img.wand, 75)
- if img.format == "JPEG":
- thumb.compression_quality = 86
- thumb.unsharp_mask(radius=0, sigma=0.5, amount=1, threshold=0.03)
- thumb.format = 'pjpeg'
- # this is to make sure pjpeg happens
- with open(meta['path'], 'wb') as f:
- thumb.save(file=f)
- if size == list(self.sizes.keys())[-1]:
- self._intermediate_symlink(meta)
- #if img.format == "JPEG":
- ## this one strips the embedded little jpg
- #call(['/usr/bin/jhead', '-dt', '-q', cpath])
- except:
- print("Unexpected error:", sys.exc_info()[0])
- raise
- def mksrcset(self, generate_caption=True, uphoto=False):
- if not self._is_downsizeable():
- return False
- for size, meta in self.sizes.items():
- if 'crop' in meta:
- continue
- # increase fallback until max fallback reached
- if size <= self.fallbacksize:
- fallback = meta['url']
- # set target for the largest
- target = meta['url']
- if uphoto:
- uphotoclass=' u-photo'
- else:
- uphotoclass=''
- caption = ''
- if not self.imgcl:
- cl = ''
- else:
- cl = self.imgcl
- if self.alttext \
- and similar_text(self.alttext, self.fname) < 90 \
- and similar_text(self.alttext, self.fname + '.' + self.fext) < 90 \
- and generate_caption:
- caption = '%s' % (self.alttext)
- if self.linkto:
- target = self.linkto
- return '' % (uphotoclass, target, fallback, self.imgcl, self.alttext, caption)
\ No newline at end of file
diff --git a/nasg.py b/nasg.py
new file mode 100644
index 0000000..ebac014
--- /dev/null
+++ b/nasg.py
@@ -0,0 +1,203 @@
+import argparse
+import logging
+import os
+import re
+import arrow
+import atexit
+from concurrent.futures import ProcessPoolExecutor
+from multiprocessing import cpu_count
+from slugify import slugify
+import nasg.config as config
+import nasg.singular as singular
+import nasg.searchindex as searchindex
+import nasg.taxonomy as taxonomy
+from pprint import pprint
+parser = argparse.ArgumentParser(description='Parameters for NASG')
+ '--regenerate', '-f',
+ dest='regenerate',
+ action='store_true',
+ default=False,
+ help='force regeneration of all HTML outputs'
+ '--downsize', '-c',
+ action='store_true',
+ dest='downsize',
+ default=False,
+ help='force re-downsizing of all suitable images'
+ '--debug', '-d',
+ action='store_true',
+ dest='debug',
+ default=False,
+ help='turn on debug log'
+class Engine(object):
+ def __init__(self):
+ self._initdirs()
+ self._lock()
+ atexit.register(self._lock, action='clear')
+ self.files = []
+ self.categories = {}
+ self.tags = {}
+ self.allposts = taxonomy.TaxonomyHandler('')
+ self.frontposts = taxonomy.TaxonomyHandler('')
+ self.allowedpattern = re.compile(config.accept_sourcefiles)
+ self.counter = {}
+ def _parse_results(self, futures):
+ for future in futures:
+ try:
+ future.result()
+ except Exception as e:
+ logging.error("processing failed: %s", e)
+ def collect(self):
+ self._setup_categories()
+ self._setup_singulars()
+ def render(self):
+ self._render_singulars()
+ #self._render_taxonomy()
+ def _render_singulars(self):
+ logging.warning("rendering singulars")
+ pprint(self.allposts)
+ #futures = []
+ #with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
+ for p in self.allposts:
+ #futures.append(executor.submit(p.write))
+ p.write()
+ #for future in futures:
+ #try:
+ #future.result()
+ #except Exception as e:
+ #logging.error("processing failed: %s", e)
+ def _render_taxonomy(self):
+ futures = []
+ with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
+ for tslug, t in self.tags.items():
+ #t.write()
+ futures.append(executor.submit(t.write))
+ for cslug, c in self.categories.items():
+ #c.write()
+ futures.append(executor.submit(c.write))
+ #self.frontposts.write()
+ futures.append(executor.submit(self.frontposts.write))
+ self._parse_results(futures)
+ def _setup_categories(self):
+ for cat, meta in config.categories.items():
+ cpath = os.path.join(config.CONTENT, cat)
+ if not os.path.isdir(cpath):
+ logging.error("category %s not found at: %s", cat, cpath)
+ continue
+ self.categories[cat] = taxonomy.TaxonomyHandler(
+ meta.get('name', cat),
+ taxonomy=meta.get('type', 'category'),
+ slug=cat,
+ render=meta.get('render', True)
+ )
+ def _setup_singulars(self):
+ futures = []
+ with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
+ for slug, tax in self.categories.items():
+ cpath = os.path.join(config.CONTENT, slug)
+ for f in os.listdir(cpath):
+ fpath = os.path.join(cpath,f)
+ if not self.allowedpattern.fullmatch(f):
+ logging.warning("unexpected file at: %s" % fpath)
+ continue
+ #self._posttype(fpath, slug)
+ futures.append(executor.submit(self._posttype, fpath, slug))
+ self._parse_results(futures)
+ def _posttype(self, fpath, cat):
+ c = self.categories[cat]
+ if re.match('.*\.jpg', fpath):
+ p = singular.PhotoHandler(fpath)
+ elif 'page' == c.taxonomy:
+ p = singular.PageHandler(fpath)
+ else:
+ p = singular.ArticleHandler(fpath)
+ c.append(p)
+ self.allposts.append(p)
+ front = config.categories[cat].get('front', True)
+ if front:
+ self.frontposts.append(p)
+ ptags = p.vars.get('tags', [])
+ for tag in ptags:
+ tslug = slugify(tag, only_ascii=True, lower=True)
+ if tslug not in self.tags:
+ self.tags[tslug] = taxonomy.TaxonomyHandler(
+ tag,
+ taxonomy='tag',
+ slug=tslug
+ )
+ self.tags[tslug].append(p)
+ def _initdirs(self):
+ for d in [
+ config.TARGET,
+ config.TTHEME,
+ config.TFILES,
+ config.VAR,
+ config.SEARCHDB,
+ config.TSDB,
+ config.LOGDIR
+ ]:
+ if not os.path.exists(d):
+ os.mkdir(d)
+ def _lock(self, action='set'):
+ if 'set' == action:
+ if os.path.exists(config.LOCKFILE):
+ raise ValueError("lockfile %s present" % config.LOCKFILE)
+ with open(config.LOCKFILE, "wt") as l:
+ l.write("%s" % arrow.utcnow())
+ l.close()
+ elif 'clear' == action:
+ if os.path.exists(config.LOCKFILE):
+ os.unlink(config.LOCKFILE)
+ else:
+ return os.path.exists(config.LOCKFILE)
+if __name__ == '__main__':
+ config.options.update(vars(parser.parse_args()))
+ loglevel = 30
+ if config.options['debug']:
+ loglevel = 10
+ while len(logging.root.handlers) > 0:
+ logging.root.removeHandler(logging.root.handlers[-1])
+ logging.basicConfig(
+ level=loglevel,
+ format='%(asctime)s - %(levelname)s - %(message)s'
+ )
+ engine = Engine()
+ engine.collect()
+ engine.render()
\ No newline at end of file
diff --git a/nasg/__init__.py b/nasg/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nasg/cmdline.py b/nasg/cmdline.py
new file mode 100644
index 0000000..595b713
--- /dev/null
+++ b/nasg/cmdline.py
@@ -0,0 +1,115 @@
+import subprocess
+import os
+import json
+import logging
+class CommandLine(object):
+ def __init__(self, cmd, stdin=''):
+ self.cmd = cmd.split(' ')
+ self.stdin = stdin
+ self.stdout = ''
+ self.binary = None
+ self._which()
+ if not self.binary:
+ raise ValueError('%s binary was not found in PATH' % self.cmd[0])
+ # based on: http://stackoverflow.com/a/377028/673576
+ def _which(self):
+ if self._is_exe(self.cmd[0]):
+ self.binary = self.cmd[0]
+ return
+ for path in os.environ["PATH"].split(os.pathsep):
+ path = path.strip('"')
+ fpath = os.path.join(path, self.cmd[0])
+ if self._is_exe(fpath):
+ self.binary = self.cmd[0] = fpath
+ return
+ def _is_exe(self, fpath):
+ return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
+ def run(self):
+ p = subprocess.Popen(
+ self.cmd,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=os.environ.copy()
+ )
+ stdout, stderr = p.communicate(self.stdin.encode('utf-8'))
+ self.stdout = stdout.decode('utf-8').strip()
+ return self
+class Exiftool(CommandLine):
+ def __init__(self, fpath = ''):
+ self.fpath = fpath
+ cmd ="/usr/local/bin/exiftool -json -sort -groupNames %s" % (fpath)
+ super(Exiftool, self).__init__(cmd)
+ def get(self):
+ self.run()
+ exif = {}
+ try:
+ exif = json.loads(self.stdout)[0]
+ except json.JSONDecodeError as e:
+ logging.error("Error when decoding JSON returned from exiftool: %s" % e)
+ pass
+ return exif
+class Pandoc(CommandLine):
+ """ Use: Pandoc.[formatter function].get()
+ available formatter functions:
+ - md2html: from markdown extra to html5
+ - html2md: from html5 to simple markdown
+ The default is plain markdown to html5 (if no formatter function added)
+ """
+ def __init__(self, text):
+ self.stdin = text
+ self.format_in = 'markdown'
+ self.format_out = 'html5'
+ self.stdout = ''
+ def md2html(self):
+ self.format_in = "markdown+" + "+".join([
+ 'backtick_code_blocks',
+ 'auto_identifiers',
+ 'fenced_code_attributes',
+ 'definition_lists',
+ 'grid_tables',
+ 'pipe_tables',
+ 'strikeout',
+ 'superscript',
+ 'subscript',
+ 'markdown_in_html_blocks',
+ 'shortcut_reference_links',
+ 'autolink_bare_uris',
+ 'raw_html',
+ 'link_attributes',
+ 'header_attributes',
+ 'footnotes',
+ ])
+ return self
+ def html2md(self):
+ self.format_out = "markdown-" + "-".join([
+ 'raw_html',
+ 'native_divs',
+ 'native_spans',
+ ])
+ return self
+ def get(self):
+ cmd = "/usr/bin/pandoc -o- --from=%s --to=%s" % (self.format_in, self.format_out)
+ super(Pandoc, self).__init__(cmd, stdin=self.stdin)
+ self.run()
+ return self.stdout
\ No newline at end of file
diff --git a/nasg/func.py b/nasg/func.py
new file mode 100644
index 0000000..f0f5009
--- /dev/null
+++ b/nasg/func.py
@@ -0,0 +1,21 @@
+import re
+def gps2dec(exifgps, ref=None):
+ pattern = re.compile(r"(?P[0-9.]+)\s+deg\s+(?P[0-9.]+)'\s+(?P[0-9.]+)\"(?:\s+(?P[NEWS]))?")
+ v = pattern.match(exifgps).groupdict()
+ dd = float(v['deg']) + (((float(v['min']) * 60) + (float(v['sec']))) / 3600)
+ if ref == 'West' or ref == 'South' or v['dir'] == "S" or v['dir'] == "W":
+ dd = dd * -1
+ return round(dd, 6)
+def baseN(num, b=36, numerals="0123456789abcdefghijklmnopqrstuvwxyz"):
+ """ Used to create short, lowecase slug for a number (an epoch) passed """
+ num = int(num)
+ return ((num == 0) and numerals[0]) or (
+ baseN(
+ num // b,
+ b,
+ numerals
+ ).lstrip(numerals[0]) + numerals[num % b]
+ )
\ No newline at end of file
diff --git a/nasg/img.py b/nasg/img.py
new file mode 100644
index 0000000..5e5ce8e
--- /dev/null
+++ b/nasg/img.py
@@ -0,0 +1,297 @@
+import os
+import re
+import shutil
+import logging
+import imghdr
+from similar_text import similar_text
+import wand.api
+import wand.image
+import wand.drawing
+import wand.color
+import nasg.config as config
+from nasg.cmdline import Exiftool
+class ImageHandler(object):
+ sizes = {
+ 90: {
+ 'ext': 's',
+ 'crop': True,
+ },
+ 360: {
+ 'ext': 'm',
+ },
+ 720: {
+ 'ext': 'z',
+ 'fallback': True
+ },
+ 1280: {
+ 'ext': 'b',
+ }
+ }
+ def __init__(self, fpath, alttext='', title='', imgcl='', linkto=False):
+ logging.info("parsing image: %s" % fpath)
+ self.fpath = os.path.abspath(fpath)
+ self.fname, self.ext = os.path.splitext(os.path.basename(fpath))
+ self.linkto = linkto
+ self.alttext = alttext
+ self.title = title
+ self.imgcl = imgcl
+ self.what = imghdr.what(self.fpath)
+ self.mime = "image/%s" % (self.what)
+ self.exif = {}
+ self.is_photo = False
+ if self.what == 'jpeg':
+ self._setexif()
+ self._is_photo()
+ self.is_downsizeable = False
+ if not self.imgcl:
+ if self.what == 'jpeg' or self.what == 'png':
+ self.is_downsizeable = True
+ self.sizes = sorted(self.sizes.items())
+ for size, meta in self.sizes:
+ meta['fname'] = "%s_%s%s" % (
+ self.fname,
+ meta['ext'],
+ self.ext
+ )
+ meta['fpath'] = os.path.join(
+ config.TFILES,
+ meta['fname']
+ )
+ meta['url'] = "%s/%s/%s" % (
+ config.site['url'],
+ config.UFILES,
+ meta['fname']
+ )
+ if 'fallback' in meta:
+ self.fallback = meta['url']
+ self.targeturl = meta['url']
+ def featured(self):
+ # sizes elements are tuples: size, meta
+ return {
+ 'mime': self.mime,
+ 'url': self.sizes[-1][1]['url'],
+ 'bytes': os.path.getsize(self.sizes[-1][1]['fpath'])
+ }
+ def _setexif(self):
+ self.exif = Exiftool(self.fpath).get()
+ def _is_photo(self):
+ model = self.exif.get('EXIF:Model', None)
+ if hasattr(config, 'cameras') and \
+ model in config.cameras:
+ self.is_photo = True
+ return
+ cprght = self.exif.get('IPTC:CopyrightNotice', '')
+ if hasattr(config, 'copyr'):
+ for s in config.copyr:
+ pattern = re.compile(r'%s' % s)
+ if pattern.match(cprght):
+ self.is_photo = True
+ return
+ def _watermark(self, img):
+ if 'watermark' not in config.options:
+ return img
+ if not os.path.isfile(config.options['watermark']):
+ return img
+ wmark = wand.image.Image(filename=config.options['watermark'])
+ if img.width > img.height:
+ w = img.width * 0.16
+ h = wmark.height * (w / wmark.width)
+ x = img.width - w - (img.width * 0.01)
+ y = img.height - h - (img.height * 0.01)
+ else:
+ w = img.height * 0.16
+ h = wmark.height * (w / wmark.width)
+ x = img.width - h - (img.width * 0.01)
+ y = img.height - w - (img.height * 0.01)
+ w = round(w)
+ h = round(h)
+ x = round(x)
+ y = round(y)
+ wmark.resize(w, h)
+ if img.width < img.height:
+ wmark.rotate(-90)
+ img.composite(image=wmark, left=x, top=y)
+ return img
+ def _sourceurlmark(self, img):
+ with wand.drawing.Drawing() as draw:
+ draw.fill_color = wand.color.Color('#fff')
+ draw.fill_opacity = 0.8
+ draw.stroke_color = wand.color.Color('#fff')
+ draw.stroke_opacity = 0.8
+ r_h = round(img.height * 0.3)
+ r_top = round((img.height/2) - (r_h/2))
+ draw.rectangle(
+ left=0,
+ top=r_top,
+ width=img.width,
+ height=r_h
+ )
+ draw(img)
+ with wand.drawing.Drawing() as draw:
+ draw.font = config.FONT
+ draw.font_size = round((img.width)/len(self.linkto)*1.5)
+ draw.gravity = 'center'
+ draw.text(
+ 0,
+ 0,
+ self.linkto
+ )
+ draw(img)
+ return img
+ def downsize(self):
+ if not self.is_downsizeable:
+ return self._copy()
+ if not self._isneeded():
+ logging.debug("downsizing not needed for %s", self.fpath)
+ return
+ logging.debug("downsizing %s", self.fpath)
+ try:
+ img = wand.image.Image(filename=self.fpath)
+ img.auto_orient()
+ except ValueError as e:
+ logging.error("opening %s with wand failed: %s", self.fpath, e)
+ return
+ if self.is_photo:
+ img = self._watermark(img)
+ elif self.linkto:
+ img = self._sourceurlmark(img)
+ for size, meta in self.sizes:
+ self._intermediate(img, size, meta)
+ #self._setmeta()
+ def _copy(self):
+ target = os.path.join(
+ config.TFILES,
+ "%s%s" % (self.fname, self.ext)
+ )
+ if os.path.isfile(target) and \
+ not config.options['downsize']:
+ return
+ logging.debug("copying %s to %s", self.fpath, target)
+ shutil.copy(self.fpath, target)
+ def _isneeded(self):
+ if config.options['downsize']:
+ return True
+ for size, meta in self.sizes:
+ if not os.path.isfile(meta['fpath']):
+ return True
+ def _intermediate_dimensions(self, img, size, meta):
+ if (img.width > img.height and 'crop' not in meta) \
+ or (img.width < img.height and 'crop' in meta):
+ width = size
+ height = int(float(size / img.width) * img.height)
+ else:
+ height = size
+ width = int(float(size / img.height) * img.width)
+ return (width, height)
+ def _intermediate(self, img, size, meta):
+ if os.path.isfile(meta['fpath']) and \
+ not config.options['downsize']:
+ return
+ try:
+ thumb = img.clone()
+ width, height = self._intermediate_dimensions(img, size, meta)
+ thumb.resize(width, height)
+ if 'crop' in meta:
+ if 'liquidcrop' in config.options and \
+ config.options['liquidcrop']:
+ thumb.liquid_rescale(size, size, 1, 1)
+ else:
+ l = t = 0
+ if width > size:
+ l = int((width - size) / 2)
+ if height > size:
+ t = int((height - size) / 2)
+ thumb.crop(left=l, top=t, width=size, height=size)
+ if img.format == "JPEG":
+ thumb.compression_quality = 86
+ thumb.unsharp_mask(
+ radius=0,
+ sigma=0.5,
+ amount=1,
+ threshold=0.03
+ )
+ thumb.format = 'pjpeg'
+ # this is to make sure pjpeg happens
+ with open(meta['fpath'], 'wb') as f:
+ thumb.save(file=f)
+ except ValueError as e:
+ logging.error("error while downsizing %s: %s", self.fpath, e)
+ return
+ def srcset(self, generate_caption=True, uphoto=False):
+ if not self.is_downsizeable:
+ return False
+ uphotoclass=''
+ if uphoto:
+ uphotoclass=' u-photo'
+ cl = ''
+ if self.imgcl:
+ cl = self.imgcl
+ caption = ''
+ if self.alttext \
+ and similar_text(self.alttext, self.fname) < 90 \
+ and similar_text(self.alttext, self.fname + '.' + self.ext) < 90 \
+ and generate_caption:
+ caption = '%s' % (self.alttext)
+ if self.linkto:
+ target = self.linkto
+ # don't put linebreaks in this: Pandoc tends to evaluate them
+ return '' % (
+ uphotoclass,
+ self.targeturl,
+ self.fallback,
+ self.imgcl,
+ self.alttext,
+ caption
+ )
\ No newline at end of file
diff --git a/nasg/img_test.py b/nasg/img_test.py
new file mode 100644
index 0000000..e69de29
diff --git a/nasg/jinjaenv.py b/nasg/jinjaenv.py
new file mode 100644
index 0000000..53880dc
--- /dev/null
+++ b/nasg/jinjaenv.py
@@ -0,0 +1,29 @@
+import arrow
+import jinja2
+from slugify import slugify
+import nasg.config as config
+JINJA2ENV = jinja2.Environment(
+ loader=jinja2.FileSystemLoader(
+ searchpath=config.TEMPLATES
+ )
+def jinja_filter_date(d, form='%Y-%m-%d %H:%m:%S'):
+ if d == 'now':
+ return arrow.now().datetime.strftime(form)
+ if form == 'c':
+ form = '%Y-%m-%dT%H:%M:%S%z'
+ return d.strftime(form)
+def jinja_filter_slugify(s):
+ return slugify(s, only_ascii=True, lower=True)
+def jinja_filter_search(s, r):
+ if r in s:
+ return True
+ return False
+JINJA2ENV.filters['date'] = jinja_filter_date
+JINJA2ENV.filters['search'] = jinja_filter_search
+JINJA2ENV.filters['slugify'] = jinja_filter_slugify
\ No newline at end of file
diff --git a/nasg/searchindex.py b/nasg/searchindex.py
new file mode 100644
index 0000000..82cd7ed
--- /dev/null
+++ b/nasg/searchindex.py
@@ -0,0 +1,76 @@
+from whoosh import fields
+from whoosh import analysis
+from whoosh import index
+import tempfile
+import atexit
+import shutil
+import nasg.config as config
+class SearchIndex(object):
+ schema = fields.Schema(
+ url=fields.ID(
+ stored=True,
+ ),
+ title=fields.TEXT(
+ stored=True,
+ analyzer=analysis.FancyAnalyzer(
+ )
+ ),
+ date=fields.DATETIME(
+ stored=True,
+ sortable=True
+ ),
+ content=fields.TEXT(
+ stored=True,
+ analyzer=analysis.FancyAnalyzer(
+ )
+ ),
+ tags=fields.TEXT(
+ stored=True,
+ analyzer=analysis.KeywordAnalyzer(
+ lowercase=True,
+ commas=True
+ )
+ ),
+ weight=fields.NUMERIC(
+ sortable=True
+ ),
+ img=fields.TEXT(
+ stored=True
+ )
+ )
+ def __init__(self):
+ self.tmp = tempfile.mkdtemp('whooshdb_', dir=tempfile.gettempdir())
+ self.ix = index.create_in(self.tmp, self.schema)
+ atexit.register(self.cleanup)
+ def add(self, vars):
+ ix = self.ix.writer()
+ ix.add_document(
+ title=vars['title'],
+ url=vars['url'],
+ content=vars['content'],
+ date=vars['published'],
+ tags=vars['tags'],
+ weight=1,
+ img=vars['img']
+ )
+ ix.commit()
+ def cleanup(self):
+ if not os.path.exists(self.tmp):
+ return
+ logging.warning("cleaning up tmp whoosh")
+ shutil.rmtree(self.tmp)
+ def save(self):
+ logging.info("deleting old searchdb")
+ shutil.rmtree(config.SEARCHDB)
+ logging.info("moving new searchdb")
+ shutil.move(self.tmp, config.SEARCHDB)
\ No newline at end of file
diff --git a/nasg/singular.py b/nasg/singular.py
new file mode 100644
index 0000000..ac0b283
--- /dev/null
+++ b/nasg/singular.py
@@ -0,0 +1,580 @@
+import os
+import re
+import logging
+import arrow
+import frontmatter
+import langdetect
+from slugify import slugify
+import nasg.config as config
+import nasg.func as func
+import nasg.cmdline as cmdline
+from nasg.img import ImageHandler
+import nasg.jinjaenv as jinjaenv
+class SingularHandler(object):
+ def __init__(self, fpath):
+ logging.info("setting up singular from %s", fpath)
+ self.fpath= os.path.abspath(fpath)
+ self.fname, self.ext = os.path.splitext(os.path.basename(self.fpath))
+ self.target = os.path.join(
+ config.TARGET, "%s" % (self.fname), "index.html"
+ )
+ slug = slugify(self.fname, only_ascii=True, lower=True)
+ self.modtime = int(os.path.getmtime(self.fpath))
+ self.category = os.path.dirname(self.fpath).replace(config.CONTENT, '').strip('/')
+ self.vars = {
+ 'category': self.category,
+ 'tags': [],
+ 'published': arrow.get(self.modtime),
+ 'updated': arrow.get(0),
+ 'author': config.author,
+ 'title': '',
+ 'raw_summary': '',
+ 'raw_content': '',
+ 'content': '',
+ 'summary': '',
+ 'reactions': {},
+ 'exif': {},
+ 'lang': config.site['lang'],
+ #'syndicate': [],
+ 'slug': slug,
+ 'shortslug': slug,
+ 'srcset': '',
+ 'url': "%s/%s/" % (config.site['url'], slug),
+ }
+ self.redirects = {}
+ self.pings = {}
+ self.template = 'singular.html'
+ self.img = None
+ self.rendered = ''
+ def __repr__(self):
+ return "Post '%s' (%s @ %s)" % (
+ self.vars['title'],
+ self.fname,
+ self.fpath
+ )
+ def _modtime(self):
+ """ Set file mtime in case it doesn't match the in-file publish or updated time """
+ use = 'published'
+ if self.vars['updated'].timestamp > self.vars['published'].timestamp:
+ use = 'updated'
+ self.modtime = int(self.vars[use].timestamp)
+ stattime = int(os.path.getmtime(self.fpath))
+ if stattime != self.modtime:
+ os.utime(self.fpath, (self.modtime, self.modtime))
+ def _detect_lang(self):
+ # try to detect language, ignore failures
+ try:
+ self.vars['lang'] = langdetect.detect(
+ "%s %s" % (
+ self.vars['title'],
+ self.vars['raw_content']
+ )
+ )
+ except:
+ pass
+ def _redirects(self):
+ if self.category in config.categories and \
+ 'nocollection' in config.categories[self.category] and \
+ config.categories[self.category]['nocollection']:
+ return
+ self.redirects[self.vars['shortslug']] = 1
+ def _shortslug(self):
+ shortslug = func.baseN(self.vars['published'].timestamp)
+ self.vars['shortslug'] = shortslug
+ def _prerender(self):
+ for s in ['content', 'summary']:
+ self.vars[s] = cmdline.Pandoc(self.vars[s]).md2html().get()
+ def _postsetup(self):
+ for s in ['content', 'summary']:
+ if not self.vars[s]:
+ self.vars[s] = self.vars['raw_%s' % s]
+ self._modtime()
+ self._shortslug()
+ self._detect_lang()
+ self._redirects()
+ self._pings()
+ def _render(self):
+ self._prerender()
+ tmpl = jinjaenv.JINJA2ENV.get_template(self.template)
+ logging.info("rendering %s", self.fname)
+ tmplvars = {
+ 'post': self.vars,
+ 'site': config.site,
+ 'taxonomy': {},
+ }
+ self.rendered = tmpl.render(tmplvars)
+ def _exists(self):
+ """ check if target exists and up to date """
+ if config.options['regenerate']:
+ logging.debug('REGENERATE active')
+ return False
+ if not os.path.isfile(self.target):
+ logging.debug('%s missing', self.target)
+ return False
+ ttime = os.stat(self.target)
+ if self.modtime == ttime.st_mtime:
+ logging.debug('%s exist and up to date', self.target)
+ return True
+ return False
+ def write(self):
+ """ Write HTML file """
+ if self._exists():
+ logging.info("skipping existing %s", self.target)
+ return
+ self._render()
+ d = os.path.dirname(self.target)
+ if not os.path.isdir(d):
+ os.mkdir(d)
+ with open(self.target, "wt") as html:
+ logging.info("writing %s", self.target)
+ html.write(self.rendered)
+ html.close()
+ os.utime(self.target, (self.modtime, self.modtime))
+ def indexvars(self):
+ """ Return values formatter for search index """
+ c = "%s %s %s %s %s" % (
+ self.vars['slug'],
+ self.vars['raw_summary'],
+ self.vars['raw_content'],
+ self.vars['reactions'],
+ self.vars['exif']
+ )
+ #c = "%s %s" % (c, self._localcopy_include())
+ imgstr = ''
+ if self.img:
+ imgstr = self.img.mksrcset(generate_caption=False)
+ ivars = {
+ 'title': self.vars['title'],
+ 'url': self.vars['url'],
+ 'content': c,
+ 'date': self.vars['published'].datetime,
+ 'tags': ",".join(self.vars['tags']),
+ 'img': imgstr
+ }
+ return ivars
+ def _pings(self):
+ """ Extract all URLs that needs pinging """
+ urlregex = re.compile(
+ r'\s+https?\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+'
+ r'\.[a-zA-Z0-9\.\/\?\:@\-_=#]*'
+ )
+ urls = re.findall(urlregex, self.vars['raw_content'])
+ for r in self.vars['reactions'].items():
+ reactiontype, reactions = r
+ if isinstance(reactions, str):
+ urls.append(reactions)
+ elif isinstance(reactions, list):
+ urls = [*reactions, *urls]
+ #for s in self.syndicate.keys():
+ #matches.append('https://brid.gy/publish/%s' % (s))
+ urlredux = {}
+ for url in urls:
+ # exclude local matches
+ if config.site['domain'] in url:
+ continue
+ urlredux[url] = 1
+ self.pings = urlredux
+ def _c_adaptify_altfpath(self, fname):
+ for c, cmeta in config.categories.items():
+ tpath = os.path.join(config.CONTENT, c, fname)
+ if os.path.isfile(tpath):
+ return tpath
+ return None
+ def _c_adaptify(self):
+ """ Generate srcset for all suitable images """
+ linkto = False
+ isrepost = None
+ if len(self.vars['reactions'].keys()):
+ isrepost = list(self.vars['reactions'].keys())[0]
+ if isrepost and \
+ len(self.vars['reactions'][isrepost]) == 1:
+ linkto = self.vars['reactions'][isrepost][0]
+ p = re.compile(
+ r'(!\[(.*)\]\((?:\/(?:files|cache)'
+ r'(?:\/[0-9]{4}\/[0-9]{2})?\/(.*\.(?:jpe?g|png|gif)))'
+ r'(?:\s+[\'\"]?(.*?)[\'\"]?)?\)(?:\{(.*?)\})?)'
+ m = p.findall(self.vars['content'])
+ if not m:
+ return
+ for shortcode, alt, fname, title, cl in m:
+ fpath = os.path.join(config.SFILES, fname)
+ if not os.path.isfile(fpath):
+ fpath = self._c_adaptify_altfpath(fname)
+ if not fpath:
+ logging.error("missing image in %s: %s", self.fpath, fname)
+ continue
+ im = ImageHandler(
+ fpath,
+ alttext=alt,
+ title=title,
+ imgcl=cl,
+ linkto=linkto
+ )
+ im.downsize()
+ srcset = im.srcset()
+ if srcset:
+ self.vars['content'] = self.vars['content'].replace(
+ shortcode, srcset
+ )
+ del(im)
+ def _c_video(self):
+ """ [video] shortcode extractor """
+ p = re.compile(
+ r'(\[video mp4=\"(?:/(?:files|cache)\/(?P.*?))\"\]'
+ r'(?:\[/video\])?)'
+ )
+ videos = p.findall(self.vars['content'])
+ if not videos:
+ return
+ for shortcode, vidf in videos:
+ video = '' % (
+ config.site['url'],
+ vidf
+ )
+ self.vars['content'] = self.vars['content'].replace(shortcode, video)
+ def _c_snippets(self):
+ """ Replaces [git:(repo)/(file.ext)] with corresponding code snippet """
+ p = re.compile(r'(\[git:([^\/]+)\/([^\]]+\.([^\]]+))\])')
+ snippets = p.findall(self.vars['content'])
+ if not snippets:
+ return
+ for shortcode, d, f, ext in snippets:
+ fpath = os.path.join(config.SOURCE, d, f)
+ if not os.path.isfile(fpath):
+ logging.error("missing blogsnippet: %s", self.fpath)
+ continue
+ if re.compile(r'conf', re.IGNORECASE).match(ext):
+ lang = 'apache'
+ else:
+ lang = ext
+ with open(fpath, "rt") as snip:
+ c = snip.read()
+ snip.close
+ c = "\n\n```%s\n%s\n```\n" % (lang, c)
+ logging.debug("replacing blogsnippet %s", self.fpath)
+ self.vars['content'] = self.vars['content'].replace(
+ shortcode, c
+ )
+ #def _c_files(self):
+ #""" Copy misc files referenced """
+ #match = re.compile(
+ #r'\s(?:%s)?/(?:files|cache)'
+ #r'/.*\.(?:(?!jpe?g|png|gif).*)\s' % (glob.conf['site']['domain'])
+ #)
+ #split = re.compile(
+ #r'\s(?:%s)?/((?:files|cache)'
+ #r'/(.*\.(?:(?!jpe?g|png|gif).*)))\s' % (glob.conf['site']['domain'])
+ #)
+ ##files = re.findall(match, self.content)
+ ##print(files)
+class ArticleHandler(SingularHandler):
+ def __init__(self, *args, **kwargs):
+ super(ArticleHandler, self).__init__(*args, **kwargs)
+ self._setup()
+ def _setup(self):
+ post = frontmatter.load(self.fpath)
+ self.vars['raw_content'] = "%s" % post.content
+ self.vars['content'] = "%s" % post.content
+ if 'tags' in post.metadata:
+ self.vars['tags'] = post.metadata['tags']
+ if 'title' in post.metadata:
+ self.vars['title'] = post.metadata['title']
+ if 'published' in post.metadata:
+ self.vars['published'] = arrow.get(post.metadata['published'])
+ if 'updated' in post.metadata:
+ self.vars['updated'] = arrow.get(post.metadata['updated'])
+ if 'summary' in post.metadata:
+ self.vars['raw_summary'] = post.metadata['summary']
+ self.vars['summary'] = "%s" % post.metadata['summary']
+ if 'redirect' in post.metadata and \
+ isinstance(post.metadata['redirect'], list):
+ for r in post.metadata['redirect']:
+ self.redirects[r.strip().strip('/')] = 1
+ #if 'syndicate' in post.metadata:
+ #z = post.metadata['syndicate']
+ #if isinstance(z, str):
+ #self.syndicate[z] = ''
+ #elif isinstance(z, dict):
+ #for s, c in z.items():
+ #self.syndicate[s] = c
+ #elif isinstance(z, list):
+ #for s in z:
+ #self.syndicate[s] = ''
+ self.vars['reactions'] = {}
+ # getting rid of '-' to avoid css trouble and similar
+ rmap = {
+ 'bookmark-of': 'bookmark',
+ 'repost-of': 'repost',
+ 'in-reply-to': 'reply',
+ }
+ for x in rmap.items():
+ key, replace = x
+ if key in post.metadata:
+ if isinstance(post.metadata[key], str):
+ self.vars['reactions'][replace] = [post.metadata[key]]
+ elif isinstance(post.metadata[key], list):
+ self.vars['reactions'][replace] = post.metadata[key]
+ self._c_adaptify()
+ self._c_snippets()
+ self._c_video()
+ #self._files()
+ super(ArticleHandler, self)._postsetup()
+class PhotoHandler(SingularHandler):
+ def __init__(self, *args, **kwargs):
+ super(PhotoHandler, self).__init__(*args, **kwargs)
+ self.img = ImageHandler(self.fpath)
+ self._setup()
+ def _setvars(self):
+ mapping = {
+ 'camera': [
+ 'EXIF:Model'
+ ],
+ 'aperture': [
+ 'EXIF:FNumber',
+ 'Composite:Aperture'
+ ],
+ 'shutter_speed': [
+ 'EXIF:ExposureTime'
+ ],
+ 'focallength': [
+ 'EXIF:FocalLength',
+ 'Composite:FocalLength35efl',
+ ],
+ 'iso': [
+ ],
+ 'lens': [
+ 'Composite:LensID',
+ 'MakerNotes:Lens',
+ 'Composite:LensSpec'
+ ]
+ }
+ for ekey, candidates in mapping.items():
+ for candidate in candidates:
+ val = self.img.exif.get(candidate, None)
+ if val:
+ self.vars['exif'][ekey] = val
+ break
+ gps = ['Latitude', 'Longitude']
+ for g in gps:
+ gk = 'EXIF:GPS%s' % (g)
+ if gk not in self.img.exif:
+ continue
+ r = 'EXIF:GPS%sRef' % (g)
+ ref = None
+ if r in self.img.exif:
+ ref = self.img.exif[r]
+ self.vars['exif']['geo_%s' % (g.lower())] = func.gps2dec(
+ self.img.exif[gk],
+ ref
+ )
+ def _setfromexif_str(self, varkey, exifkeys):
+ for key in exifkeys:
+ val = self.img.exif.get(key, None)
+ if not val:
+ continue
+ self.vars[varkey] = val.strip()
+ return
+ def _setfromexif_lst(self, varkey, exifkeys):
+ collected = {}
+ for key in exifkeys:
+ val = self.img.exif.get(key, None)
+ if not val:
+ continue
+ if isinstance(val, str):
+ self.img.exif[key] = val.split(",")
+ # not elif: the previous one converts all string to list
+ # we rely on that
+ if isinstance(val, list):
+ for v in val:
+ collected[slugify(str(v).strip())] = str(v).strip()
+ self.vars[varkey] = collected.values()
+ return
+ def _setfromexif_date(self, varkey, exifkeys):
+ pattern = re.compile(
+ "(?P[0-9]{4}):(?P[0-9]{2}):(?P[0-9]{2})\s+"
+ "(?P[0-9]{2}:[0-9]{2}:[0-9]{2})Z?"
+ )
+ for key in exifkeys:
+ if key not in self.img.exif:
+ continue
+ if not self.img.exif[key]:
+ continue
+ date = None
+ v = pattern.match(self.img.exif[key]).groupdict()
+ if not v:
+ continue
+ try:
+ date = arrow.get('%s-%s-%s %s' % (v['Y'], v['M'], v['D'], v['T']))
+ except:
+ continue
+ if not date:
+ continue
+ self.vars['published'] = date
+ logging.debug("'published' set to %s from key %s", self.vars['published'], key)
+ return
+ def _setup(self):
+ self._setfromexif_str('title', [
+ 'XMP:Title',
+ 'XMP:Headline',
+ 'IPTC:Headline'
+ ])
+ self._setfromexif_str('raw_content', [
+ 'XMP:Description',
+ 'IPTC:Caption-Abstract'
+ ])
+ self._setfromexif_lst('tags', [
+ 'XMP:Keywords',
+ 'IPTC:Keywords'
+ ])
+ self._setfromexif_date('published', [
+ 'XMP:DateTimeDigitized',
+ 'XMP:CreateDate',
+ 'EXIF:CreateDate',
+ 'EXIF:ModifyDate'
+ ])
+ self._setvars()
+ self.img.title = self.vars['title']
+ self.img.alttext = self.vars['title']
+ self.vars['content'] = "%s\n\n%s" % (
+ self.vars['raw_content'],
+ self.img.srcset(generate_caption=False, uphoto=True)
+ )
+ self.img.downsize()
+ self.vars['img'] = self.img.featured()
+ super(PhotoHandler, self)._postsetup()
+class PageHandler(SingularHandler):
+ def __init__(self, *args, **kwargs):
+ super(PageHandler, self).__init__(*args, **kwargs)
+ self.template = 'page.html'
+ self._setup()
+ def _setup(self):
+ with open(self.fpath) as c:
+ self.vars['raw_content'] = c.read()
+ c.close()
+ self._c_adaptify()
+ super(PageHandler, self)._postsetup()
\ No newline at end of file
diff --git a/nasg/taxonomy.py b/nasg/taxonomy.py
new file mode 100644
index 0000000..5db2506
--- /dev/null
+++ b/nasg/taxonomy.py
@@ -0,0 +1,319 @@
+import math
+import logging
+import os
+import collections
+from slugify import slugify
+import nasg.config as config
+import nasg.jinjaenv as jinjaenv
+import arrow
+class TaxonomyHandler(object):
+ def __init__(self, name, taxonomy='category', slug='', description='', render=True):
+ logging.info("setting up taxonomy: %s", name)
+ self.name = name
+ self.taxonomy = taxonomy
+ self.description = description
+ self.render = render
+ if slug:
+ self.slug = slug
+ else:
+ self.slug = slugify(self.name, only_ascii=True, lower=True)
+ self.posts = collections.OrderedDict()
+ #self.basedir = os.path.join(config.TARGET, self.taxonomy, self.slug)
+ if len(self.taxonomy) and len(self.name):
+ self.basedir = os.path.join(config.TARGET, self.taxonomy, self.slug)
+ self.baseurl = "/%s/%s/" % (self.taxonomy, self.slug)
+ else:
+ self.baseurl = '/'
+ self.basedir = os.path.join(config.TARGET)
+ self.modtime = 0
+ def __getitem__(self, key):
+ return self.posts[key]
+ def __repr__(self):
+ return 'Taxonomy %s (name: %s, slug: %s) with %i posts' % (
+ self.taxonomy,
+ self.name,
+ self.slug,
+ len(self.posts)
+ )
+ def __next__(self):
+ try:
+ r = self.posts.next()
+ except:
+ raise StopIteration()
+ return r
+ def __iter__(self):
+ for ix, post in self.posts.items():
+ yield post
+ return
+ def append(self, post):
+ k = int(post.vars['published'].timestamp)
+ if k in self.posts:
+ logging.error("colliding post timestamps: %s vs %s", self.posts[k].fpath, post.fpath)
+ inc = 1
+ while k in self.posts:
+ k = int(k+1)
+ self.posts[k] = post
+ self.posts = collections.OrderedDict(sorted(self.posts.items(), reverse=True))
+ def write(self):
+ if not self.render:
+ return
+ l = list(self.posts.keys())
+ if len(l):
+ self.modtime = max(list(self.posts.keys()))
+ else:
+ self.modtime = arrow.utcnow().timestamp
+ self._write_pages()
+ self._write_rss()
+ def _page_vars(self, page, pages, start, end):
+ return {
+ 'taxonomy': {
+ 'url': self.baseurl,
+ 'name': self.name,
+ 'taxonomy': self.taxonomy,
+ 'description': self.description,
+ 'paged': page,
+ 'total': pages,
+ 'perpage': int(config.site['pagination']),
+ },
+ 'site': config.site,
+ 'posts': [self.posts[k].vars for k in list(sorted(
+ self.posts.keys(), reverse=True))[start:end]],
+ }
+ def _write_file(self, fpath, template, tvars):
+ tmpl = jinjaenv.JINJA2ENV.get_template(template)
+ logging.info("writing %s" % (fpath))
+ with open(fpath, "wt") as f:
+ r = tmpl.render(tvars)
+ f.write(r)
+ f.close()
+ os.utime(fpath, (self.modtime, self.modtime))
+ def _write_rss(self):
+ rssdir = os.path.join(self.basedir, 'feed')
+ if not os.path.isdir(rssdir):
+ os.makedirs(rssdir)
+ fpath = os.path.join(rssdir, 'index.xml')
+ tvars = self._page_vars(1, 1, 0, int(config.site['rsspagination']))
+ self._write_file(fpath, 'rss.html', tvars)
+ def _write_page(self, page, pages, start, end):
+ if 1 == page:
+ pagedir = self.basedir
+ else:
+ pagedir = os.path.join(self.basedir, 'page', "%i" % page)
+ if not os.path.isdir(pagedir):
+ os.makedirs(pagedir)
+ fpath = os.path.join(pagedir, 'index.html')
+ tvars = self._page_vars(page, pages, start, end)
+ self._write_file(fpath, 'archive.html', tvars)
+ def _write_pages(self):
+ perpage = int(config.site['pagination'])
+ pages = math.ceil(len(self.posts)/perpage)
+ page = 1
+ while page <= pages:
+ start = int((page-1) * perpage)
+ end = int(start+perpage)
+ self._write_page(page, pages, start, end)
+ page += 1
+ #def _test_freshness(self):
+ #t, lp = list(self.posts.items())[0]
+ #self.lptime = lp.ftime.st_mtime
+ #if os.path.isfile(self.indexpath):
+ #p = self.indexpath
+ #elif os.path.isfile(self.simplepath):
+ #p = self.simplepath
+ #else:
+ #return False
+ #itime = os.stat(p)
+ #if itime.st_mtime == self.lptime and not glob.FORCEWRITE:
+ #logging.debug(
+ #'Taxonomy tree is fresh for %s' % (self.name)
+ #)
+ #return True
+ #return False
+ #def _test_dirs(self):
+ #if not os.path.isdir(self.taxp):
+ #os.mkdir(self.taxp)
+ #if not os.path.isdir(self.basep):
+ #os.mkdir(self.basep)
+ #def write_paginated(self):
+ #if self._test_freshness():
+ #return
+ #self._test_dirs()
+ #taxp = os.path.join(glob.TARGET, self.taxonomy)
+ #basep = os.path.join(glob.TARGET, self.taxonomy, self.slug)
+ #if not os.path.isdir(taxp):
+ #os.mkdir(taxp)
+ #if not os.path.isdir(basep):
+ #os.mkdir(basep)
+ #pages = math.ceil(len(self.posts) / glob.conf['perpage'])
+ #page = 1
+ #if len(self.taxonomy) and len(self.slug):
+ #base_url = "/%s/%s/" % (self.taxonomy, self.slug)
+ #else:
+ #base_url = '/'
+ #while page <= pages:
+ #start = int((page-1) * int(glob.conf['perpage']))
+ #end = int(start + int(glob.conf['perpage']))
+ #dorss = False
+ #posttmpls = [self.posts[k].tmpl() for k in list(sorted(
+ #self.posts.keys(), reverse=True))[start:end]]
+ #if page == 1:
+ #tpath = self.indexpath
+ #do_rss = True
+ ## RSS
+ #else:
+ #do_rss = False
+ #if not os.path.isdir(self.pagedp):
+ #os.mkdir(self.pagedp)
+ #tdir = os.path.join(self.pagedp, "%d" % page)
+ #if not os.path.isdir(tdir):
+ #os.mkdir(tdir)
+ #tpath = os.path.join(tdir, "index.html")
+ #tvars = {
+ #'taxonomy': {
+ #'url': base_url,
+ #'name': self.name,
+ #'taxonomy': self.taxonomy,
+ #'description': self.description,
+ #'paged': page,
+ #'total': pages,
+ #'perpage': glob.conf['perpage'],
+ #},
+ #'site': glob.conf['site'],
+ #'posts': posttmpls,
+ #}
+ #tmpl = glob.jinja2env.get_template('archive.html')
+ #logging.info("rendering %s" % (tpath))
+ #with open(tpath, "w") as html:
+ #r = tmpl.render(tvars)
+ #soup = BeautifulSoup(r, "html5lib")
+ #r = soup.prettify()
+ #logging.info("writing %s" % (tpath))
+ #html.write(r)
+ #html.close()
+ #os.utime(tpath, (self.lptime, self.lptime))
+ #if do_rss:
+ #feeddir = os.path.join(self.basep, 'feed')
+ #if not os.path.isdir(feeddir):
+ #os.mkdir(feeddir)
+ #feedpath = os.path.join(feeddir, "index.xml")
+ #tmpl = glob.jinja2env.get_template('rss.html')
+ #logging.info("rendering %s" % (feedpath))
+ #with open(feedpath, "w") as html:
+ #r = tmpl.render(tvars)
+ #logging.info("writing %s" % (feedpath))
+ #html.write(r)
+ #html.close()
+ #os.utime(feedpath, (self.lptime, self.lptime))
+ #page = page+1
+ #def write_simple(self, template='archive.html'):
+ #if self._test_freshness():
+ #return
+ #self._test_dirs()
+ #base_url = "/%s/" % (self.slug)
+ #posttmpls = [self.posts[k].tmpl() for k in list(sorted(
+ #self.posts.keys(), reverse=True))]
+ #tvars = {
+ #'taxonomy': {
+ #'url': base_url,
+ #'name': self.name,
+ #'taxonomy': self.taxonomy,
+ #'description': self.description,
+ #'paged': 0,
+ #'total': 0,
+ #'perpage': glob.conf['perpage'],
+ #},
+ #'site': glob.conf['site'],
+ #'posts': posttmpls,
+ #}
+ #with open(os.path.join(self.simplepath), "w") as html:
+ #html.write(json.dumps(tvars, indent=4, sort_keys=True, default=str))
+ #html.close()
+ ##tmpl = glob.jinja2env.get_template('gallery.html')
+ ##logging.info("rendering %s" % (indexpath))
+ ##with open(indexpath, "w") as html:
+ ##r = tmpl.render(tvars)
+ ##soup = BeautifulSoup(r, "html5lib")
+ ##r = soup.prettify()
+ ##logging.info("writing %s" % (indexpath))
+ ##html.write(r)
+ ##html.close()
+ ##os.utime(indexpath, (lptime, lptime))
+ #def writesitemap(self):
+ #sitemap = "%s/sitemap.txt" % (glob.TARGET)
+ #urls = []
+ #for p in self.posts.items():
+ #t, data = p
+ #urls.append( "%s/%s" % ( glob.conf['site']['url'], data.slug ) )
+ #with open(sitemap, "w") as f:
+ #logging.info("writing %s" % (sitemap))
+ #f.write("\n".join(urls))
+ #f.close()
\ No newline at end of file
diff --git a/nasg/tests/cmdline.py b/nasg/tests/cmdline.py
new file mode 100644
index 0000000..bcee844
