diff --git a/nasg.py b/nasg.py index fbd72b1..4683fa8 100755 --- a/nasg.py +++ b/nasg.py @@ -25,17 +25,15 @@ import frontmatter from slugify import slugify import langdetect import requests -#from breadability.readable import Article -from newspaper import Article as newspaper3k from whoosh import index from whoosh import qparser import jinja2 import urllib.parse -import shared from webmentiontools.send import WebmentionSend from bleach import clean from emoji import UNICODE_EMOJI from bs4 import BeautifulSoup +import shared def splitpath(path): parts = [] @@ -46,23 +44,64 @@ def splitpath(path): return parts +class BaseIter(object): + def __init__(self): + self.data = {} + + def append(self, key, value): + if key in self.data: + logging.warning("duplicate key: %s, using existing instead", key) + existing = self.data.get(key) + if hasattr(value, 'fname') and hasattr(existing, 'fname'): + logging.warning( + "%s collides with existing %s", + value.fname, + existing.fname + ) + return + self.data[key] = value + + + def __getitem__(self, key): + return self.data.get(key, {}) + + + def __repr__(self): + return json.dumps(list(self.data.values())) + + + def __next__(self): + try: + r = self.data.next() + except: + raise StopIteration() + return r + + + def __iter__(self): + for k, v in self.data.items(): + yield (k, v) + return + + class BaseRenderable(object): def __init__(self): return - def writerendered(self, target, content, mtime): - d = os.path.dirname(target) + + def writerendered(self, content): + d = os.path.dirname(self.target) if not os.path.isdir(d): os.mkdir(d) - with open(target, "w") as html: - logging.debug('writing %s', target) + with open(self.target, "w") as html: + logging.debug('writing %s', self.target) html.write(content) html.close() - os.utime(target, (mtime, mtime)) + os.utime(self.target, (self.mtime, self.mtime)) + class Indexer(object): - def __init__(self): self.target = os.path.abspath(os.path.join( shared.config.get('target', 'builddir'), @@ -153,6 +192,7 @@ class Indexer(object): mtime=singular.mtime ) + def finish(self): self.writer.commit() @@ -187,6 +227,7 @@ class OfflineCopy(object): with open(self.target, 'wt') as f: f.write(frontmatter.dumps(self.fm)) + @property def archiveorgurl(self): a = self.fetch( @@ -208,6 +249,7 @@ class OfflineCopy(object): logging.error("archive.org parsing failed: %s", e) return None + def fetch(self, url): try: r = requests.get( @@ -222,7 +264,6 @@ class OfflineCopy(object): return None - def run(self): if os.path.isfile(self.target): with open(self.target) as f: @@ -257,6 +298,7 @@ class Renderer(object): self.j2.filters['search'] = Renderer.jinja_filter_search self.j2.filters['slugify'] = Renderer.jinja_filter_slugify + @staticmethod def jinja_filter_date(d, form='%Y-%m-%d %H:%m:%S'): if d == 'now': @@ -265,10 +307,12 @@ class Renderer(object): form = '%Y-%m-%dT%H:%M:%S%z' return d.strftime(form) + @staticmethod def jinja_filter_slugify(s): return slugify(s, only_ascii=True, lower=True) + @staticmethod def jinja_filter_search(s, r): if r in s: @@ -276,50 +320,17 @@ class Renderer(object): return False -class BaseIter(object): - def __init__(self): - self.data = {} - - def append(self, key, value): - if key in self.data: - logging.warning("duplicate key: %s, using existing instead", key) - existing = self.data.get(key) - if hasattr(value, 'fname') and hasattr(existing, 'fname'): - logging.warning( - "%s collides with existing %s", - value.fname, - existing.fname - ) - return - self.data[key] = value - - def __getitem__(self, key): - return self.data.get(key, {}) - - def __repr__(self): - return json.dumps(list(self.data.values())) - - def __next__(self): - try: - r = self.data.next() - except: - raise StopIteration() - return r - - def __iter__(self): - for k, v in self.data.items(): - yield (k, v) - return - # based on http://stackoverflow.com/a/10075210 class ExifTool(shared.CMDLine): """ Handles calling external binary `exiftool` in an efficient way """ sentinel = "{ready}\n" + def __init__(self): super().__init__('exiftool') - def get_metadata(self, *filenames): + + def run(self, *filenames): return json.loads(self.execute( '-sort', '-json', @@ -358,9 +369,11 @@ class Comment(BaseRenderable): self.tmplfile = 'comment.html' self.__parse() + def __repr__(self): return "%s" % (self.path) + def __parse(self): with open(self.path, mode='rt') as f: self.meta, self.content = frontmatter.parse(f.read()) @@ -388,6 +401,7 @@ class Comment(BaseRenderable): return self._reacji + @property def html(self): if hasattr(self, '_html'): @@ -396,6 +410,7 @@ class Comment(BaseRenderable): self._html = shared.Pandoc().convert(self.content) return self._html + @property def tmplvars(self): if hasattr(self, '_tmplvars'): @@ -414,6 +429,7 @@ class Comment(BaseRenderable): } return self._tmplvars + @property def published(self): if hasattr(self, '_published'): @@ -421,10 +437,12 @@ class Comment(BaseRenderable): self._published = arrow.get(self.meta.get('date', self.mtime)) return self._published + @property def pubtime(self): return int(self.published.timestamp) + @property def source(self): if hasattr(self, '_source'): @@ -437,6 +455,7 @@ class Comment(BaseRenderable): self._source = '' return self._source + @property def target(self): if hasattr(self, '_target'): @@ -445,6 +464,7 @@ class Comment(BaseRenderable): self._target = '{p.path}'.format(p=urllib.parse.urlparse(t)).strip('/') return self._target + async def render(self, renderer): logging.info("rendering and saving comment %s", self.fname) targetdir = os.path.abspath(os.path.join( @@ -470,12 +490,7 @@ class Comment(BaseRenderable): 'taxonomy': {}, } r = renderer.j2.get_template(self.tmplfile).render(tmplvars) - self.writerendered(target, r, self.mtime) - #with open(target, "w") as html: - #logging.debug('writing %s', target) - #html.write(r) - #html.close() - #os.utime(target, (self.mtime, self.mtime)) + self.writerendered(r) class Comments(object): @@ -486,9 +501,11 @@ class Comments(object): )) self.bytarget = {} + def __getitem__(self, key): return self.bytarget.get(key, BaseIter()) + def populate(self): for fpath in self.files: item = Comment(fpath) @@ -519,7 +536,7 @@ class Images(BaseIter): def populate(self): with ExifTool() as e: - _meta = e.get_metadata(*self.files) + _meta = e.run(*self.files) # parsing the returned meta into a dict of [filename]={meta} for e in _meta: if 'FileName' not in e: @@ -537,6 +554,7 @@ class Images(BaseIter): self.data[fname] = WebImage(fname, e) + def exifdate(self, value): """ converts and EXIF date string to ISO 8601 format @@ -557,6 +575,7 @@ class Images(BaseIter): match.group('time') ) + class WebImage(object): def __init__(self, fname, meta): logging.info( @@ -604,6 +623,7 @@ class WebImage(object): "%s%s" % (self.fname, self.ext) ) + def __str__(self): if self.is_downsizeable: if self.singleimage and not self.cl: @@ -630,6 +650,7 @@ class WebImage(object): self.cl ) + @property def exif(self): if not self.is_photo: @@ -687,36 +708,6 @@ class WebImage(object): self._exif = exif return self._exif - #def __str__(self): - #if self.is_downsizeable and not self.cl: - #uphoto = '' - #if self.singleimage: - #uphoto = ' u-photo' - #return '\n
%s
%s%s
\n' % ( - #uphoto, - #self.target, - #self.fallback, - #self.alttext, - #self.fname, - #self.ext - #) - #elif self.cl: - #self.cl = self.cl.replace('.', ' ') - #return '%s' % ( - #self.fallback, - #self.cl, - #self.alttext, - #self.fname, - #self.ext - #) - - #else: - #return '%s' % ( - #self.fallback, - #self.alttext, - #self.fname, - #self.ext - #) @property def rssenclosure(self): @@ -732,6 +723,7 @@ class WebImage(object): } return self._rssenclosure + @property def is_photo(self): if hasattr(self, '_is_photo'): @@ -753,6 +745,7 @@ class WebImage(object): return self._is_photo + @property def is_downsizeable(self): if hasattr(self, '_is_downsizeable'): @@ -773,6 +766,7 @@ class WebImage(object): return self._is_downsizeable + def _copy(self): target = os.path.join( shared.config.get('target', 'filesdir'), @@ -782,6 +776,7 @@ class WebImage(object): logging.debug("can't downsize %s, copying instead" % self.fname) shutil.copy(self.fpath, target) + def _watermark(self, img): """ Composite image by adding watermark file over it """ wmarkfile = os.path.join( @@ -890,6 +885,7 @@ class WebImage(object): for (size, meta) in self.sizes: self._intermediate(img, size, meta, existing) + class Taxonomy(BaseIter): def __init__(self, name = None, taxonomy = None, slug = None): super(Taxonomy, self).__init__() @@ -900,6 +896,7 @@ class Taxonomy(BaseIter): self.slug = slug self.taxonomy = taxonomy + @property def pages(self): if hasattr(self, '_pages'): @@ -910,6 +907,7 @@ class Taxonomy(BaseIter): def __repr__(self): return "taxonomy %s with %d items" % (self.taxonomy, len(self.data)) + @property def basep(self): p = shared.config.get('target', 'builddir') @@ -917,6 +915,7 @@ class Taxonomy(BaseIter): p = os.path.join(p, self.taxonomy) return p + @property def myp(self): p = self.basep @@ -924,14 +923,17 @@ class Taxonomy(BaseIter): return os.path.join(p,self.slug) return p + @property def feedp(self): return os.path.join(self.myp, 'feed') + @property def pagep(self): return os.path.join(self.myp, 'page') + @property def baseurl(self): if self.taxonomy and self.slug: @@ -939,6 +941,7 @@ class Taxonomy(BaseIter): else: return '/' + @property def mtime(self): if hasattr(self, '_mtime'): @@ -946,6 +949,7 @@ class Taxonomy(BaseIter): self._mtime = int(list(sorted(self.data.keys(), reverse=True))[0]) return self._mtime + def __mkdirs(self): check = [self.basep, self.myp, self.feedp] @@ -963,12 +967,14 @@ class Taxonomy(BaseIter): logging.debug("creating dir %s", p) os.mkdir(p) + def tpath(self, page): if page == 1: return "%s/index.html" % (self.myp) else: return "%s/%d/index.html" % (self.pagep, page) + async def render(self, renderer): if not self.slug or self.slug is 'None': return @@ -994,6 +1000,7 @@ class Taxonomy(BaseIter): self.renderpage(renderer, page) page = page+1 + def renderpage(self, renderer, page): pagination = int(shared.config.get('common', 'pagination')) start = int((page-1) * pagination) @@ -1074,6 +1081,7 @@ class Taxonomy(BaseIter): os.utime(target, (self.mtime, self.mtime)) # --- + class Content(BaseIter): def __init__(self, images, comments, extensions=['md']): super(Content, self).__init__() @@ -1088,6 +1096,7 @@ class Content(BaseIter): self.front = Taxonomy() self.shortslugmap = {} + def populate(self): now = arrow.utcnow().timestamp for fpath in self.files: @@ -1114,6 +1123,7 @@ class Content(BaseIter): self.tags[tslug].append(item.pubtime, item) self.symlinktag(tslug, item.path) + def symlinktag(self, tslug, fpath): fdir, fname = os.path.split(fpath) tagpath = os.path.join(shared.config.get('source', 'tagsdir'), tslug) @@ -1125,6 +1135,7 @@ class Content(BaseIter): if not os.path.islink(dst): os.symlink(src, dst) + def sitemap(self): target = os.path.join( shared.config.get('target', 'builddir'), @@ -1141,6 +1152,7 @@ class Content(BaseIter): logging.info("writing sitemap to %s" % (target)) f.write("\n".join(urls)) + def magicphp(self, renderer): redirects = [] gones = [] @@ -1183,6 +1195,7 @@ class Content(BaseIter): html.write(r) html.close() + class Singular(BaseRenderable): def __init__(self, path, images, comments): logging.debug("initiating singular object from %s", path) @@ -1199,9 +1212,11 @@ class Singular(BaseRenderable): self.photo.singleimage = True self.__parse() + def __repr__(self): return "%s (lastmod: %s)" % (self.fname, self.published) + def __parse(self): with open(self.path, mode='rt') as f: self.meta, self.content = frontmatter.parse(f.read()) @@ -1215,6 +1230,7 @@ class Singular(BaseRenderable): # REMOVE THIS trigger = self.offlinecopies + def __filter_favs(self): url = self.meta.get('favorite-of', self.meta.get('like-of', @@ -1241,6 +1257,7 @@ class Singular(BaseRenderable): self.content = c + def __filter_images(self): linkto = False isrepost = None @@ -1275,6 +1292,7 @@ class Singular(BaseRenderable): "%s" % image ) + @property def comments(self): if hasattr(self, '_comments'): @@ -1289,6 +1307,7 @@ class Singular(BaseRenderable): self._comments = [c[k] for k in list(sorted(c.keys(), reverse=True))] return self._comments + @property def replies(self): if hasattr(self, '_replies'): @@ -1296,6 +1315,7 @@ class Singular(BaseRenderable): self._replies = [c.tmplvars for c in self.comments if not len(c.reacji)] return self._replies + @property def reacjis(self): if hasattr(self, '_reacjis'): @@ -1341,6 +1361,7 @@ class Singular(BaseRenderable): self._reactions = reactions return self._reactions + @property def urls(self): if hasattr(self, '_urls'): @@ -1363,6 +1384,7 @@ class Singular(BaseRenderable): self._urls = r return self._urls + @property def lang(self): if hasattr(self, '_lang'): @@ -1379,10 +1401,12 @@ class Singular(BaseRenderable): self._lang = lang return self._lang + @property def tags(self): return list(self.meta.get('tags', [])) + @property def published(self): if hasattr(self, '_published'): @@ -1392,6 +1416,7 @@ class Singular(BaseRenderable): ) return self._published + @property def updated(self): if hasattr(self, '_updated'): @@ -1403,29 +1428,35 @@ class Singular(BaseRenderable): ) return self._updated + @property def pubtime(self): return int(self.published.timestamp) + @property def isphoto(self): if not self.photo: return False return self.photo.is_photo + @property def isbookmark(self): return self.meta.get('bookmark-of', False) + @property def isreply(self): return self.meta.get('in-reply-to', False) + # TODO #@property #def isrvsp(self): # r'([^<]+)' + @property def isfav(self): r = False @@ -1436,12 +1467,14 @@ class Singular(BaseRenderable): break return r + @property def ispage(self): if not self.meta: return True return False + @property def isonfront(self): if self.ispage: @@ -1452,16 +1485,19 @@ class Singular(BaseRenderable): return False return True + @property def iscategorised(self): if self.ispage: return False return True + @property def summary(self): return self.meta.get('summary', '') + @property def title(self): if hasattr(self, '_title'): @@ -1475,10 +1511,12 @@ class Singular(BaseRenderable): break return self._title + @property def url(self): return "%s/%s/" % (shared.config.get('site', 'url'), self.fname) + @property def tmplfile(self): if self.ispage: @@ -1486,6 +1524,7 @@ class Singular(BaseRenderable): else: return 'singular.html' + @property def html(self): if hasattr(self, '_html'): @@ -1493,6 +1532,7 @@ class Singular(BaseRenderable): self._html = shared.Pandoc().convert(self.content) return self._html + @property def sumhtml(self): if hasattr(self, '_sumhtml'): @@ -1502,6 +1542,7 @@ class Singular(BaseRenderable): self._sumhtml = shared.Pandoc().convert(self.summary) return self._sumhtml + @property def offlinecopies(self): # stupidly simple property caching @@ -1522,19 +1563,21 @@ class Singular(BaseRenderable): self.copies = copies return copies + @property def exif(self): if not self.isphoto: return {} - return self.photo.exif + @property def rssenclosure(self): if not self.isphoto: return {} return self.photo.rssenclosure + @property def tmplvars(self): if hasattr(self, '_tmplvars'): @@ -1565,6 +1608,7 @@ class Singular(BaseRenderable): } return self._tmplvars + @property def shortslug(self): if hasattr(self, '_shortslug'): @@ -1572,10 +1616,12 @@ class Singular(BaseRenderable): self._shortslug = shared.baseN(self.pubtime) return self._shortslug + async def rendercomments(self, renderer): for comment in self.comments: await comment.render(renderer) + async def render(self, renderer): # this is only when I want salmentions and I want to include all of the comments as well # otherwise it affects both webmentions sending and search indexing @@ -1639,6 +1685,7 @@ class Singular(BaseRenderable): pinger.db[h] = record + class Webmentioner(object): def __init__(self): self.dbpath = os.path.abspath(os.path.join( @@ -1652,6 +1699,7 @@ class Webmentioner(object): else: self.db = {} + def finish(self): with open(self.dbpath, 'wt') as f: f.write(json.dumps(self.db, sort_keys=True, indent=4)) @@ -1746,6 +1794,7 @@ class NASG(object): for (pubtime, singular) in content: await singular.ping(pinger) + def run(self): if os.path.isfile(self.lockfile): raise ValueError( diff --git a/pesos.py b/pesos.py index 86402df..2a5fe0e 100644 --- a/pesos.py +++ b/pesos.py @@ -92,8 +92,7 @@ class Fav(object): @property def exists(self): - return False - #return os.path.isfile(self.target) + return os.path.isfile(self.target) @property def imgname(self): @@ -220,8 +219,6 @@ class Favs(object): @property def lastpulled(self): - return 0 - mtime = 0 d = os.path.join( shared.config.get('source', 'contentdir'), diff --git a/shared.py b/shared.py index 3beffd4..ab8be86 100644 --- a/shared.py +++ b/shared.py @@ -7,6 +7,7 @@ import subprocess from whoosh import fields from whoosh import analysis + def __expandconfig(config): """ add the dirs to the config automatically """ basepath = os.path.expanduser(config.get('common','base')) @@ -25,8 +26,9 @@ def __expandconfig(config): )) return config + def baseN(num, b=36, numerals="0123456789abcdefghijklmnopqrstuvwxyz"): - """ Used to create short, lowecase slug for a number (an epoch) passed """ + """ Used to create short, lowercase slug for a number (an epoch) passed """ num = int(num) return ((num == 0) and numerals[0]) or ( baseN( @@ -101,6 +103,7 @@ config = configparser.ConfigParser( config.read('config.ini') config = __expandconfig(config) + class CMDLine(object): def __init__(self, executable): self.executable = self._which(executable) @@ -108,6 +111,7 @@ class CMDLine(object): raise OSError('No %s found in PATH!' % executable) return + @staticmethod def _which(name): for d in os.environ['PATH'].split(':'): @@ -116,6 +120,7 @@ class CMDLine(object): return which.pop() return None + def __enter__(self): self.process = subprocess.Popen( [self.executable, "-stay_open", "True", "-@", "-"], @@ -126,10 +131,12 @@ class CMDLine(object): ) return self + def __exit__(self, exc_type, exc_value, traceback): self.process.stdin.write("-stay_open\nFalse\n") self.process.stdin.flush() + def execute(self, *args): args = args + ("-execute\n",) self.process.stdin.write(str.join("\n", args)) @@ -140,8 +147,10 @@ class CMDLine(object): output += os.read(fd, 4096).decode('utf-8', errors='ignore') return output[:-len(self.sentinel)] + class Pandoc(CMDLine): """ Pandoc command line call with piped in- and output """ + def __init__(self, md2html=True): super().__init__('pandoc') if md2html: @@ -172,6 +181,7 @@ class Pandoc(CMDLine): ]) self.i = 'html' + def convert(self, text): cmd = ( self.executable, diff --git a/tagmyloc.py b/tagmyloc.py new file mode 100644 index 0000000..476d78f --- /dev/null +++ b/tagmyloc.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 + +import asyncio +import uvloop +import os + +from sanic import Sanic +import sanic.response +from sanic.log import log as logging +#import jinja2 +import requests +import shared +import json + + +def locationtags_500px(lat, lon, radius=0.5, num=10): + + tags = [] + if not lat or not lon: + return tags + + logging.info("requesting locationtags from 500px for '%s, %s'", lat, lon) + params = { + 'rpp': 100, + 'geo': "%s,%s,%skm" % (lat, lon, radius), + 'consumer_key': shared.config.get('500px', 'api_key'), + 'tags': 1, + } + + r = requests.get('https://api.500px.com/v1/photos/search',params=params) + try: + results = json.loads(r.text) + except Exception as e: + logging.error('failed to load results for 500px request: %s', e) + logging.error('request was: %s', r.url) + return tags, r.status_code + + _temp = {} + for p in results.get('photos', []): + for t in p.get('tags', []): + if not t or not len(t): + continue + + curr = _temp.get(t, 1) + _temp[t] = curr+1 + + for w in sorted(_temp, key=_temp.get, reverse=True): + tags.append(w) + + return tags[:num], 200 + + +def locationtags_flickr(lat, lon, radius=0.5, num=10): + + tags = [] + if not lat or not lon: + return tags + + logging.info("requesting locationtags from Flickr for '%s, %s'", lat, lon) + params = { + 'method': 'flickr.photos.search', + 'api_key': shared.config.get('flickr', 'api_key'), + 'has_geo': 1, + 'lat': lat, + 'lon': lon, + 'radius': radius, + 'extras': ','.join(['tags','machine_tags']), + 'per_page': 500, + 'format': 'json', + 'nojsoncallback': 1 + } + + r = requests.get('https://api.flickr.com/services/rest/',params=params) + try: + results = json.loads(r.text) + #logging.debug("flickr response: %s", results) + except Exception as e: + logging.error('failed to load results for Flickr request: %s', e) + logging.error('request was: %s', r.url) + return tags, r.status_code + + _temp = {} + for p in results.get('photos', {}).get('photo', {}): + for t in p.get('tags', '').split(' '): + if not t or not len(t): + continue + + curr = _temp.get(t, 1) + _temp[t] = curr+1 + + for w in sorted(_temp, key=_temp.get, reverse=True): + tags.append(w) + + return tags[:num], 200 + #return tags + + +def RequestHandler(lat, lon, rad, num=20): + ftags, status = locationtags_flickr(lat, lon, rad, num) + fivehtags, status = locationtags_500px(lat, lon, rad, num) + + return sanic.response.json({ + 'flickr': ftags, + '500px': fivehtags, + }, status=status) + +if __name__ == '__main__': + asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) + app = Sanic() + + @app.route("/tagmyloc") + async def search(request, methods=["GET"]): + lat = request.args.get('lat') + lon = request.args.get('lon') + rad = request.args.get('rad') + return RequestHandler(lat, lon, rad) + + app.run(host="127.0.0.1", port=8003, debug=True)