import glob
import asyncio
import uvloop
import os
from sanic import Sanic
import sanic.response
from sanic.log import log as logging
from whoosh import index, qparser
import pynmea2
import datetime
import pytz
import re
import validators
import requests
import pypandoc
import hashlib
import time
from webmentiontools import urlinfo
import json
import calendar
import mimetypes
import singular
import urllib.parse
from ruamel import yaml
from slugify import slugify
import smtplib
import iso8601
import csv
import shutil
import collections
from git import Repo, Actor
import frontmatter
#import gzip
import arrow
class ToEmail(object):
def __init__(self, webmention):
self.webmention = webmention
self.set_html()
self.set_headers()
def set_html(self):
for authormeta in ['email', 'name', 'url']:
if not authormeta in self.webmention['author']:
self.webmention['author'][authormeta] = ''
html = """
New %s
- From
-
%s
%s
- Source
- %s
- Target
- %s
%s
""" % (
self.webmention['type'],
self.webmention['author']['url'],
self.webmention['author']['name'],
self.webmention['author']['email'],
self.webmention['author']['email'],
self.webmention['source'],
self.webmention['source'],
self.webmention['target'],
self.webmention['target'],
pypandoc.convert_text(
self.webmention['content'],
to='html5',
format="markdown+" + "+".join([
'backtick_code_blocks',
'auto_identifiers',
'fenced_code_attributes',
'definition_lists',
'grid_tables',
'pipe_tables',
'strikeout',
'superscript',
'subscript',
'markdown_in_html_blocks',
'shortcut_reference_links',
'autolink_bare_uris',
'raw_html',
'link_attributes',
'header_attributes',
'footnotes',
])
)
)
self.html = html
def set_headers(self):
""" Create and send email from a parsed webmention """
self.headers = {
'Content-Type': 'text/html; charset=utf-8',
'Content-Disposition': 'inline',
'Content-Transfer-Encoding': '8bit',
'Date': self.webmention['date'].strftime('%a, %d %b %Y %H:%M:%S %Z'),
'X-WEBMENTION-SOURCE': self.webmention['source'],
'X-WEBMENTION-TARGET': self.webmention['target'],
'From': glob.conf['from']['address'],
'To': glob.conf['to']['address'],
'Subject': "[webmention] from %s to %s" % ( self.webmention['source'], self.webmention['target'] ),
}
def send(self):
msg = ''
for key, value in self.headers.items():
msg += "%s: %s\n" % ( key, value )
msg += "\n%s\n" % self.html
try:
s = smtplib.SMTP( glob.conf['smtp']['host'], glob.conf['smtp']['port'] )
if glob.conf['smtp']['tls']:
s.ehlo()
s.starttls()
s.ehlo()
if glob.conf['smtp']['username'] and glob.conf['smtp']['password']:
s.login(glob.conf['smtp']['username'], glob.conf['smtp']['password'])
s.sendmail( self.headers['From'], [ self.headers['To'] ], msg.encode("utf8") )
s.quit()
except:
print("Unexpected error:", sys.exc_info()[0])
raise
class MicropubHandler(object):
def __init__(self, request):
self.request = request
self.response = sanic.response.text("Unhandled error", status=500)
self.slug = ''
self.content = ''
self.category = 'note'
self.meta = {}
self.dt = datetime.datetime.now().replace(tzinfo=pytz.utc)
logging.debug("incoming micropub request:")
logging.debug(self.request.body)
logging.debug("** args:")
logging.debug(self.request.args)
logging.debug("** query string:")
logging.debug(self.request.query_string)
logging.debug("** headers:")
logging.debug(self.request.headers)
with open(os.path.join(glob.CACHE, "tags.json"), "r") as db:
self.existing_tags = json.loads(db.read())
db.close()
self._parse()
def _verify(self):
if 'q' in self.request.args:
if 'config' in self.request.args['q']:
self.response = sanic.response.json({
'tags': self.existing_tags
}, status=200)
return
if 'syndicate-to' in self.request.args['q']:
self.response = sanic.response.json({
'syndicate-to': []
}, status=200)
return
if not 'access_token' in self.request.form:
self.response = sanic.response.text("Mising access token", status=401)
return
token = self.request.form.get('access_token')
verify = requests.get(
'https://tokens.indieauth.com/token',
allow_redirects=False,
timeout=10,
headers={
'Content-Type': 'application/x-www-form-urlencoded',
'Authorization': 'Bearer %s' % (token)
});
if verify.status_code != requests.codes.ok:
self.response = sanic.response.text("Could not verify access token", status=500)
return False
response = urllib.parse.parse_qs(verify.text)
logging.debug(response)
if 'scope' not in response or 'me' not in response:
self.response = sanic.response.text("Could not verify access token", status=401)
return False
if '%s/' % (glob.conf['site']['url'].rstrip()) not in response['me']:
self.response = sanic.response.text("You can't post to this domain.", status=401)
return False
if 'post' not in response['scope'] and 'create' not in response['scope']:
self.response = sanic.response.text("Invalid scope", status=401)
return False
return True
def _parse(self):
if not self._verify():
return
if len(self.request.files):
self.response = sanic.response.text("File handling is not yet done", status=501)
return
#for ffield in self.request.files.keys():
#logging.info("got file field: %s" % ffield)
#f = self.request.files.get(ffield)
#logging.info("mime is: %s" % f.type)
#logging.info("ext should be: %s" % mimetypes.guess_extension(f.type))
##f.body
##f.type
##logging.info( f )
self.meta['published'] = self.dt.strftime('%Y-%m-%dT%H:%M:%S%z')
slug = None
if 'content' in self.request.form and len(self.request.form.get('content')):
self.content = self.request.form.get('content')
if 'summary' in self.request.form and len(self.request.form.get('summary')):
self.meta['summary'] = self.request.form.get('summary')
if 'slug' in self.request.form and len(self.request.form.get('slug')):
slug = self.request.form.get('slug')
if 'name' in self.request.form and len(self.request.form.get('name')):
self.meta['title'] = self.request.form.get('name')
if not slug:
slug = self.meta['title']
if 'in-reply-to' in self.request.form and len(self.request.form.get('in-reply-to')):
self.meta['in-reply-to'] = self.request.form.get('in-reply-to')
if not slug:
slug = 're: %s', self.meta['in-reply-to']
if 'repost-of' in self.request.form and len(self.request.form.get('repost-of')):
self.meta['repost-of'] = self.request.form.get('repost-of')
category = 'bookmark'
if not slug:
slug = '%s', self.meta['repost-of']
if 'bookmark-of' in self.request.form and len(self.request.form.get('bookmark-of')):
self.meta['bookmark-of'] = self.request.form.get('bookmark-of')
self.category = 'bookmark'
if not slug:
slug = '%s', self.meta['bookmark-of']
if 'category[]' in self.request.form:
self.meta['tags'] = list(self.request.form['category[]'])
if 'summary' in self.meta and ('IT' in self.meta['tags'] or 'it' in self.meta['tags']):
self.category = 'article'
elif 'summary' in self.meta and ('journal' in self.meta['tags'] or 'journal' in self.meta['tags']):
self.category = 'journal'
if not slug:
slug = singular.SingularHandler.baseN(calendar.timegm(self.dt.timetuple()))
self.slug = slugify(slug, only_ascii=True, lower=True)
self._write()
def _write(self):
fpath = os.path.join(glob.CONTENT, self.category, '%s.md' % (self.slug))
if os.path.isfile(fpath):
self.response = sanic.response.text("Update handling is not yet done", status=501)
return
logfile = os.path.join(glob.LOGDIR, "micropub-%s.log" % (self.dt.strftime("%Y-%m")))
with open (logfile, 'a') as micropublog:
logging.debug("logging micropub request")
micropublog.write("%s %s\n" % (self.dt.strftime('%Y-%m-%dT%H:%M:%S%z'), fpath))
micropublog.close()
with open (fpath, 'w') as mpf:
logging.info("writing file to: %s", fpath)
out = "---\n" + yaml.dump(self.meta, Dumper=yaml.RoundTripDumper, allow_unicode=True, indent=4) + "---\n\n" + self.content
mpf.write(out)
mpf.close()
self._git(fpath)
logging.info("trying to open and parse the received post")
post = singular.ArticleHandler(fpath, category=self.category)
post.write()
post.pings()
self.response = sanic.response.text(
"Post created",
status = 201,
headers = {
'Location': "%s/%s/" % (glob.conf['site']['url'], self.slug)
}
)
return
def _git(self, fpath):
logging.info("committing to git")
repo = Repo(glob.CONTENT)
author = Actor(glob.conf['author']['name'], glob.conf['author']['email'])
index = repo.index
newfile = fpath.replace(glob.CONTENT, '').lstrip('/')
index.add([newfile])
message = 'new content via micropub: %s' % (newfile)
index.commit(message, author=author, committer=author)
class SearchHandler(object):
def __init__ (self, query):
self.query = query
self.response = sanic.response.text("You seem to have forgot to enter what you want to search for. Please try again.", status=400)
if not query:
return
self._tmpl = glob.jinja2env.get_template('searchresults.html')
self._ix = index.open_dir(glob.SEARCHDB)
self._parse()
def _parse(self):
self.query = self.query.replace('+', ' AND ')
self.query = self.query.replace(' -', ' NOT ')
qp = qparser.MultifieldParser(
["title", "content", "tags"],
schema = glob.schema
)
q = qp.parse(self.query)
r = self._ix.searcher().search(q, sortedby="weight", limit=100)
logging.info("results for '%s': %i", self.query, len(r))
results = []
for result in r:
res = {
'title': result['title'],
'url': result['url'],
'highlight': result.highlights("content"),
}
if 'img' in result:
res['img'] = result['img']
results.append(res)
tvars = {
'term': self.query,
'site': glob.conf['site'],
'posts': results,
'taxonomy': {}
}
logging.info("collected %i results to render", len(results))
html = self._tmpl.render(tvars)
self.response = sanic.response.html(html, status=200)
class WebmentionHandler(object):
def __init__ ( self, source, target ):
self.source = source
self.target = target
self.time = arrow.utcnow().timestamp
logging.debug("validating: from: %s; to: %s" % (self.source, self.target) )
self.response = sanic.response.json({
'status': 'ok','msg': 'accepted',
}, 200)
self._validate()
self._parse()
self._archive()
self._send()
def _validate(self):
if not validators.url(self.source):
self.response = sanic.response.json({
'status': 'error','msg': '"souce" parameter is an invalid URL',
}, 400)
return
if not validators.url(self.target):
self.response = sanic.response.json({
'status': 'error','msg': '"target" parameter is an invalid URL',
}, 400)
return
_target = urllib.parse.urlparse(self.target)
_target_domain = '{uri.netloc}'.format(uri=_target)
if not _target_domain in glob.conf['accept_domains']:
self.response = sanic.response.json({
'status': 'error',
'msg': "%s' is not in the list of allowed domains" % (
_target_domain
)
}, 400)
return
_source = urllib.parse.urlparse(self.source)
_source_domain = '{uri.netloc}'.format(uri=_source)
if _source_domain == _target_domain and not glob.conf['allow_selfmention']:
self.response = sanic.response.json({
'status': 'error',
'msg': "selfpings are disabled"
}, 400)
return
return
def _parse(self):
if self.response.status != 200:
return
self._log()
self._source = urlinfo.UrlInfo(self.source)
if self._source.error:
logging.warning( "couldn't fetch %s; dropping webmention" % (self.source))
return
self.source = self._source.realurl
if not self._source.linksTo(self.target):
logging.warning( "%s is not linking to %s; dropping webmention" % (self.source, self.target))
return
self._target = urlinfo.UrlInfo(self.target)
if self._target.error:
logging.warning( "couldn't fetch %s; dropping webmention" % (self.target))
return
self.target = self._target.realurl
self.webmention = {
'author': self._source.author(),
'type': self._source.relationType(),
'target': self.target,
'source': self.source,
'date': arrow.get(self._source.pubDate()),
'content': pypandoc.convert_text(
self._source.content(),
to="markdown-" + "-".join([
'raw_html',
'native_divs',
'native_spans',
]),
format='html'
)
}
def _send(self):
if self.response.status != 200:
return
m = ToEmail(self.webmention)
m.send()
def _archive(self):
if self.response.status != 200:
return
fbase = self.webmention['date'].format('YYYY-MM-DD-HH-mm-ss')
fpath = self._archive_name(fbase)
archive = dict(self.webmention)
archive['date'] = archive['date'].format('YYYY-MM-DDTHH.mm.ssZ')
content = archive['content']
del(archive['content'])
with open (fpath, 'w') as f:
logging.info("writing file to: %s", fpath)
out = "---\n" + yaml.dump(
archive,
Dumper=yaml.RoundTripDumper,
allow_unicode=True,
indent=4
) + "---\n\n" + content
f.write(out)
f.close()
def _verify_archive(self, p):
archive = frontmatter.load(p)
if 'target' not in archive.metadata:
logging.warning('missing target')
return False
if 'source' not in archive.metadata:
logging.warning('missing source')
return False
if 'date' not in archive.metadata:
logging.warning('missing date')
return False
if archive.metadata['target'] != self.webmention['target']:
logging.warning('target different')
return False
if archive.metadata['source'] != self.webmention['source']:
logging.warning('source different')
return False
d = arrow.get(archive.metadata['date'])
if d.timestamp != self.webmention['date'].timestamp:
logging.warning('date different')
return False
# overwrite
return True
def _archive_name(self, archive, ext='.md'):
p = os.path.join(glob.COMMENTS, "%s%s" % (archive, ext))
if not os.path.exists(p):
logging.debug("%s doesn't exits yet" % p)
return p
logging.debug("%s exists, checking for update" % p)
if self._verify_archive(p):
return p
# another comment with the exact same second? wy not.
names = [x for x in os.listdir(glob.COMMENTS) if x.startswith(archive)]
suffixes = [x.replace(archive, '').replace(ext, '').replace('.','') for x in names]
indexes = [int(x) for x in suffixes if x and set(x) <= set('0123456789')]
idx = 1
if indexes:
idx += sorted(indexes)[-1]
return os.path.join(glob.COMMENTS, "%s.%d%s" % (archive, idx, ext))
def _log(self):
if not os.path.isdir(glob.LOGDIR):
os.mkdir (glob.LOGDIR)
logfile = os.path.join(glob.LOGDIR, datetime.datetime.now().strftime("%Y-%m"))
s = json.dumps({
'time': self.time,
'source': self.source,
'target': self.target
})
with open(logfile, "a") as log:
logging.debug( "writing logfile %s with %s" % (logfile, s))
log.write("%s\n" % (s))
log.close()
class TimeSeriesHandler(object):
def __init__(self, tag):
if not os.path.isdir(glob.TSDBDIR):
os.mkdir(glob.TSDBDIR)
self.tag = tag
self.p = os.path.join(glob.TSDBDIR, '%s.csv' % (self.tag))
self.db = {}
#def _loaddb(self):
#if not os.path.isfile(self.p):
#return
#pattern = re.compile(r'^([0-9-\+:T]+)\s+(.*)$')
#searchfile = open(self.p, 'r')
#for line in searchfile:
#matched = re.match(pattern, line)
#if not matched:
#continue
#epoch = int(iso8601.parse_date(matched.group(1)).replace(tzinfo=pytz.utc).strftime('%s'))
#data = matched.group(2)
#self.db[epoch] = data
#searchfile.close()
#def _dumpdb(self):
#lines = []
#for e in self.db.items():
#epoch, data = e
#tstamp = datetime.datetime.utcfromtimestamp(epoch).replace(tzinfo=pytz.utc).strftime(glob.ISODATE)
#line = '%s %s' % (tstamp, data)
#lines.append(line)
#bkp = '%s.bkp' % (self.p)
#shutil.copy(self.p, bkp)
#with open(self.p, "w") as searchfile:
#searchfile.write()
#del(cr)
#csvfile.close()
#os.unlink(bkp)
@staticmethod
def _common_date_base(d1, d2):
d1 = d1.replace(tzinfo=pytz.utc).strftime(glob.ISODATE)
d2 = d2.replace(tzinfo=pytz.utc).strftime(glob.ISODATE)
l = len(d1)
common = ''
for i in range(l):
if d1[i] == d2[i]:
common = common + d1[i]
else:
break
return common
def search(self, when, tolerance=1800):
when = when.replace(tzinfo=pytz.utc)
tolerance = int(tolerance/2)
minwhen = when - datetime.timedelta(seconds=tolerance)
maxwhen = when + datetime.timedelta(seconds=tolerance)
closest = None
mindiff = float('inf')
common = TimeSeriesHandler._common_date_base(minwhen, maxwhen)
pattern = re.compile(r'^(%s[0-9-\+:T]+)\s+(.*)$' % (common))
searchfile = open(self.p, 'r')
for line in searchfile:
matched = re.match(pattern, line)
if not matched:
continue
d = iso8601.parse_date(matched.group(1))
diff = d - when
diff = abs(diff.total_seconds())
if diff >= mindiff:
continue
mindiff = diff
closest = (d, matched.group(2))
searchfile.close()
return closest
def append(self, data, dt=datetime.datetime.now().replace(tzinfo=pytz.utc)):
if os.path.isfile(self.p):
epoch = int(dt.strftime('%s'))
stat = os.stat(self.p)
if epoch < stat.st_mtime:
logging.warning('Refusing to append %s with old data' % self.p)
return
with open(self.p, 'a') as db:
db.write("%s %s\n" % (
dt.strftime(glob.ISODATE),
data
))
class DataHandler(object):
def __init__(self, request):
self.request = request
self.dt = datetime.datetime.now().replace(tzinfo=pytz.utc)
self.response = sanic.response.text('accepted',status=200)
if not 'secrets' in glob.conf or \
not 'devices' in glob.conf['secrets']:
self.response = sanic.response.text(
'server configuration error',
status=501
)
return
if 'id' not in self.request.args:
self.response = sanic.response.text(
'device id not found in request',
status=401
)
return
id = self.request.args.get('id')
if id not in glob.conf['secrets']['devices'].keys():
self.response = sanic.response.text(
'device id rejected',
status=401
)
return
self.id = glob.conf['secrets']['devices'][id]
class OpenGTSHandler(DataHandler):
def __init__(self, *args, **kwargs):
super(OpenGTSHandler, self).__init__(*args, **kwargs)
self.lat = 0
self.lon = 0
self.alt = 0
self._parse()
self.l = '%s 0' % (self.dt.strftime(glob.ISODATE))
def _parse(self):
logging.debug('--- incoming location request ---')
logging.debug(self.request.args)
if 'latitude' in self.request.args and 'longitude' in self.request.args:
self.lat = float(self.request.args.get('latitude'))
self.lon = float(self.request.args.get('longitude'))
elif 'gprmc' in self.request.args:
gprmc = pynmea2.parse(self.request.args.get('gprmc'))
try:
self.lat = float(gprmc.latitude)
self.lon = float(gprmc.longitude)
except:
self.response = sanic.response.text(
"could not process gprmc string",
status=422
)
return
else:
self.response = sanic.response.text(
"no location information found in query",
status=401
)
return
if 'exclude_coordinates' in glob.conf['secrets']:
excl = {}
for t in ['lat', 'lon']:
excl[t] = []
if t in glob.conf['secrets']['exclude_coordinates']:
for c in glob.conf['secrets']['exclude_coordinates'][t]:
excl[t].append(float(c))
if round(self.lat,2) in excl['lat'] and round(self.lon,2) in excl['lon']:
self.response = sanic.response.text(
"this location is on the excluded list",
status=200
)
return
if 'loc_timestamp' in self.request.args and 'offset' in self.request.args:
# this is a bit ugly: first convert the epoch to datetime
# then append it with the offset as string
# and convert the string back to datetime from the iso8601 string
dt = datetime.datetime.utcfromtimestamp(int(self.request.args.get('loc_timestamp')))
dt = dt.strftime('%Y-%m-%dT%H:%M:%S')
dt = "%s%s" % (dt, self.request.args.get('offset'))
try:
self.dt = iso8601.parse_date(dt).replace(tzinfo=pytz.utc)
except:
pass
if 'altitude' in self.request.args:
self.alt = float(self.request.args.get('altitude'))
else:
try:
self.alt = OpenGTSHandler.altitude_from_bing(self.lat, self.lon)
except:
pass
self.lat = "{:4.6f}".format(float(self.lat))
self.lon = "{:4.6f}".format(float(self.lon))
self.alt = "{:4.6f}".format(float(self.alt))
l = '%s %s %s' % (self.lat, self.lon, self.alt)
gpsfile = TimeSeriesHandler('location')
gpsfile.append(l, dt=self.dt)
@staticmethod
def altitude_from_bing(lat, lon):
if 'bing_key' not in glob.conf['secrets']:
return 0
if not glob.conf['secrets']['bing_key']:
return 0
url = "http://dev.virtualearth.net/REST/v1/Elevation/List?points=%s,%s&key=%s" % (
lat,
lon,
glob.conf['secrets']['bing_key']
)
bing = requests.get(url)
bing = json.loads(bing.text)
if 'resourceSets' not in bing or \
'resources' not in bing['resourceSets'][0] or \
'elevations' not in bing['resourceSets'][0]['resources'][0] or \
not bing['resourceSets'][0]['resources'][0]['elevations']:
return 0
alt = float(bing['resourceSets'][0]['resources'][0]['elevations'][0])
del(bing)
del(url)
return alt
class SensorHandler(DataHandler):
def __init__(self, *args, **kwargs):
super(SensorHandler, self).__init__(*args, **kwargs)
self.data = 0
self.tag = ''
self._parse()
def _parse(self):
logging.debug('--- incoming sensor request ---')
logging.debug(self.request.args)
for tag in self.request.args:
if tag == 'id':
continue
datafile = TimeSeriesHandler('%s-%s' % (self.id, tag))
datafile.append(self.request.args.get(tag), dt=self.dt)
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
app = Sanic()
@app.route("/webmention")
async def wm(request, methods=["POST"]):
source = request.form.get('source')
target = request.form.get('target')
r = WebmentionHandler(source, target)
return r.response
@app.route("/search")
async def search(request, methods=["GET"]):
query = request.args.get('s')
r = SearchHandler(query)
return r.response
@app.route("/micropub")
async def mpub(request, methods=["POST","GET"]):
r = MicropubHandler(request)
return r.response
@app.route("/opengts")
async def opengts(request, methods=["GET"]):
r = OpenGTSHandler(request)
return r.response
@app.route("/sensor")
async def sensor(request, methods=["GET"]):
r = SensorHandler(request)
return r.response
if __name__ == "__main__":
app.run(host="127.0.0.1", port=8000, debug=True)