working search and webmentions receiver

2017-05-26 10:14:24 +01:00 · 2017-05-26 10:14:24 +01:00 · 558195288d
commit 558195288d
parent 1b7b354a88
7 changed files with 752 additions and 123 deletions
--- a/envelope.py
+++ b/envelope.py
@ -0,0 +1,193 @@
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+from email.mime.image import MIMEImage
+from email.header import Header
+import email.charset
+from email.generator import Generator
+from io import StringIO
+import mimetypes
+from email.mime.base import MIMEBase
+from email.encoders import encode_base64
+import email.utils
+
+import time
+import getpass
+import socket
+import shutil
+import requests
+import tempfile
+import atexit
+import os
+import re
+import smtplib
+import logging
+from shared import Pandoc
+
+class Letter(object):
+    def __init__(self, sender=None, recipient=None, subject='', text=''):
+        self.sender = sender or (getpass.getuser(), socket.gethostname())
+        self.recipient = recipient or self.sender
+
+        self.tmp = tempfile.mkdtemp(
+            'envelope_',
+            dir=tempfile.gettempdir()
+        )
+        atexit.register(
+            shutil.rmtree,
+            os.path.abspath(self.tmp)
+        )
+        self.text = text;
+        self.subject = subject
+        self.images = []
+        self.ready = None
+        self.time = time.time()
+        self.headers = {}
+
+    @property
+    def _html(self):
+        return Pandoc().convert(self.text)
+
+    @property
+    def _tmpl(self):
+        return "<html><head></head><body>%s</body></html>" % (self._html)
+
+    def __pull_image(self, img):
+        fname = os.path.basename(img)
+        i = {
+            'url': img,
+            'name': fname,
+            'tmp': os.path.join(self.tmp, fname),
+        }
+
+        logging.debug("pulling image %s", i['url'])
+        r = requests.get(i['url'], stream=True)
+        if r.status_code == 200:
+            with open(i['tmp'], 'wb') as f:
+                logging.debug("writing image %s", i['tmp'])
+                r.raw.decode_content = True
+                shutil.copyfileobj(r.raw, f)
+                if not isinstance(self.images, list):
+                    self.images = []
+                self.images.append(i)
+
+
+    def __pull_images(self):
+        mdmatch = re.compile(
+            r'!\[.*\]\((.*?\.(?:jpe?g|png|gif)(?:\s+[\'\"]?.*?[\'\"]?)?)\)'
+            r'(?:\{.*?\})?'
+        )
+        [self.__pull_image(img) for img in mdmatch.findall(self.text)]
+
+
+    def __attach_images(self):
+        self.__pull_images()
+        for i in self.images:
+            cid = 'cid:%s' % (i['name'])
+            logging.debug("replacing %s with %s", i['url'], cid)
+            self.text = self.text.replace(i['url'], cid)
+
+
+    def make(self, inline_images=True):
+        if inline_images:
+            self.__attach_images()
+
+
+        # Python, by default, encodes utf-8 in base64, which makes plain text
+        # mail painful; this overrides and forces Quoted Printable.
+        # Quoted Printable is still awful, but better, and we're going to
+        # force the mail to be 8bit encoded.
+        # Note: enforcing 8bit breaks compatibility with ancient mail clients.
+        email.charset.add_charset('utf-8', email.charset.QP, email.charset.QP, 'utf-8')
+
+        mail = MIMEMultipart('alternative')
+
+        # --- setting headers ---
+        self.headers = {
+            'Subject': Header(re.sub(r"\r?\n?$", "", self.subject, 1), 'utf-8').encode(),
+            'To': email.utils.formataddr(self.recipient),
+            'From': email.utils.formataddr(self.sender),
+            'Date': email.utils.formatdate(self.time, localtime=True)
+        }
+
+        for k, v in self.headers.items():
+            mail.add_header(k, "%s" % v)
+        logging.debug("headers: %s", self.headers)
+
+        # --- adding plain text ---
+        text = self.text
+        _text = MIMEText(text, 'text', _charset='utf-8')
+        # ---
+        # this is the part where we overwrite the way Python thinks:
+        # force the text to be the actual, unencoded, utf-8.
+        # Note:these steps breaks compatibility with ancient mail clients.
+        _text.replace_header('Content-Transfer-Encoding', '8bit')
+        _text.replace_header('Content-Type', 'text/plain; charset=utf-8')
+        _text.set_payload(self.text)
+        # ---
+        logging.debug("text: %s", _text)
+        mail.attach(_text)
+
+        # --- HTML bit ---
+        # this is where it gets tricky: the HTML part should be a 'related'
+        # wrapper, in which the text and all the related images are sitting
+        _envelope = MIMEMultipart('related')
+
+
+        html = self._tmpl
+        _html = MIMEText(html, 'html', _charset='utf-8')
+        # ---
+        # see above under 'adding plain text'
+        _html.replace_header('Content-Transfer-Encoding', '8bit')
+        _html.replace_header('Content-Type', 'text/html; charset=utf-8')
+        _html.set_payload(html)
+        # ---
+        logging.debug("HTML: %s", _html)
+        _envelope.attach(_html)
+
+        for i in self.images:
+            mimetype, encoding = mimetypes.guess_type(i['tmp'])
+            mimetype = mimetype or 'application/octet-stream'
+            mimetype = mimetype.split('/', 1)
+            attachment = MIMEBase(mimetype[0], mimetype[1])
+            with open(i['tmp'], 'rb') as img:
+                attachment.set_payload(img.read())
+                img.close()
+            os.unlink(i['tmp'])
+
+            encode_base64(attachment)
+            attachment.add_header(
+                'Content-Disposition',
+                'inline',
+                filename=i['name']
+            )
+            attachment.add_header(
+                'Content-ID',
+                '<%s>' % (i['name'])
+            )
+
+            _envelope.attach(attachment)
+
+        # add the whole html + image pack to the mail
+        mail.attach(_envelope)
+
+        str_io = StringIO()
+        g = Generator(str_io, False)
+        g.flatten(mail)
+
+        self.ready = str_io.getvalue().encode('utf-8')
+
+    def send(self):
+        if not self.ready:
+            logging.error('this mail is not ready')
+            return
+
+        try:
+            s = smtplib.SMTP('127.0.0.1', 25)
+            # unless you do the encode, you'll get:
+            #   File "/usr/local/lib/python3.5/smtplib.py", line 850, in sendmail
+            #   msg = _fix_eols(msg).encode('ascii')
+            #   UnicodeEncodeError: 'ascii' codec can't encode character '\xa0' in position 1073: ordinal not in range(128)
+            s.sendmail(self.headers['From'], self.headers['To'], self.ready)
+            s.quit()
+        except Exception as e:
+            logging.error('sending mail failed with error: %s', e)
--- a/nasg.py
+++ b/nasg.py
@ -8,15 +8,15 @@ import shutil
 import logging
 import json
 import glob
-import subprocess
 import tempfile
 import atexit
 import re
 import hashlib
 import math
 import asyncio
-import magic
+import csv

+import magic
 import arrow
 import wand.image
 import similar_text
@ -27,7 +27,7 @@ import requests
 from breadability.readable import Article
 from whoosh import index
 import jinja2
-
+import urllib.parse
 import shared

 def splitpath(path):
@ -70,13 +70,19 @@ class Indexer(object):
        for url, offlinecopy in singular.offlinecopies.items():
            content_remote.append("%s" % offlinecopy)

+        weight = 1
+        if singular.isbookmark:
+            weight = 10
+        if singular.ispage:
+            weight = 100
+
        self.writer.add_document(
            title=singular.title,
            url=singular.url,
            content=" ".join(list(map(str,[*content_real, *content_remote]))),
            date=singular.published.datetime,
            tags=",".join(list(map(str, singular.tags))),
-            weight=1,
+            weight=weight,
            img="%s" % singular.photo
        )

@ -190,35 +196,6 @@ class Renderer(object):
            return True
        return False

-    #def rendersingular(self, singular):
-        #logging.debug("rendering and saving %s", singular.fname)
-        #targetdir = os.path.abspath(os.path.join(
-            #shared.config.get('target', 'builddir'),
-            #singular.fname
-        #))
-        #target = os.path.join(targetdir, 'index.html')
-
-        #if not shared.config.get('params', 'force') and os.path.isfile(target):
-            #ttime = int(os.path.getmtime(target))
-            #if ttime == singular.mtime:
-                #logging.debug('%s exists and up-to-date (lastmod: %d)', target, ttime)
-                #return
-
-        #if not os.path.isdir(targetdir):
-            #os.mkdir(targetdir)
-
-        #tmpl = self.j2.get_template(singular.tmplfile)
-        #tmplvars = {
-            #'post': singular.tmplvars,
-            #'site': self.sitevars,
-            #'taxonomy': {},
-        #}
-        #r = tmpl.render(tmplvars)
-        #with open(target, "w") as html:
-            #html.write(r)
-            #html.close()
-            #os.utime(target, (singular.mtime, singular.mtime))
-

 class BaseIter(object):
    def __init__(self):
@ -248,97 +225,97 @@ class BaseIter(object):
            yield (k, v)
        return

-class CMDLine(object):
-    def __init__(self, executable):
-        self.executable = self._which(executable)
-        if self.executable is None:
-            raise OSError('No %s found in PATH!' % executable)
-            return
+#class CMDLine(object):
+    #def __init__(self, executable):
+        #self.executable = self._which(executable)
+        #if self.executable is None:
+            #raise OSError('No %s found in PATH!' % executable)
+            #return

-    @staticmethod
-    def _which(name):
-        for d in os.environ['PATH'].split(':'):
-            which = glob.glob(os.path.join(d, name), recursive=True)
-            if which:
-                return which.pop()
-        return None
+    #@staticmethod
+    #def _which(name):
+        #for d in os.environ['PATH'].split(':'):
+            #which = glob.glob(os.path.join(d, name), recursive=True)
+            #if which:
+                #return which.pop()
+        #return None

-    def __enter__(self):
-        self.process = subprocess.Popen(
-            [self.executable, "-stay_open", "True",  "-@", "-"],
-            universal_newlines=True,
-            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-        return self
+    #def __enter__(self):
+        #self.process = subprocess.Popen(
+            #[self.executable, "-stay_open", "True",  "-@", "-"],
+            #universal_newlines=True,
+            #stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+        #return self

-    def  __exit__(self, exc_type, exc_value, traceback):
-        self.process.stdin.write("-stay_open\nFalse\n")
-        self.process.stdin.flush()
+    #def  __exit__(self, exc_type, exc_value, traceback):
+        #self.process.stdin.write("-stay_open\nFalse\n")
+        #self.process.stdin.flush()

-    def execute(self, *args):
-        args = args + ("-execute\n",)
-        self.process.stdin.write(str.join("\n", args))
-        self.process.stdin.flush()
-        output = ""
-        fd = self.process.stdout.fileno()
-        while not output.endswith(self.sentinel):
-            output += os.read(fd, 4096).decode('utf-8', errors='ignore')
-        return output[:-len(self.sentinel)]
+    #def execute(self, *args):
+        #args = args + ("-execute\n",)
+        #self.process.stdin.write(str.join("\n", args))
+        #self.process.stdin.flush()
+        #output = ""
+        #fd = self.process.stdout.fileno()
+        #while not output.endswith(self.sentinel):
+            #output += os.read(fd, 4096).decode('utf-8', errors='ignore')
+        #return output[:-len(self.sentinel)]


-class Pandoc(CMDLine):
-    """ Handles calling external binary `exiftool` in an efficient way """
-    def __init__(self, md2html=True):
-        super().__init__('pandoc')
-        if md2html:
-            self.i = "markdown+" + "+".join([
-                'backtick_code_blocks',
-                'auto_identifiers',
-                'fenced_code_attributes',
-                'definition_lists',
-                'grid_tables',
-                'pipe_tables',
-                'strikeout',
-                'superscript',
-                'subscript',
-                'markdown_in_html_blocks',
-                'shortcut_reference_links',
-                'autolink_bare_uris',
-                'raw_html',
-                'link_attributes',
-                'header_attributes',
-                'footnotes',
-            ])
-            self.o = 'html5'
-        else:
-            self.o = "markdown-" + "-".join([
-                'raw_html',
-                'native_divs',
-                'native_spans',
-            ])
-            self.i = 'html'
+#class Pandoc(CMDLine):
+    #""" Handles calling external binary `exiftool` in an efficient way """
+    #def __init__(self, md2html=True):
+        #super().__init__('pandoc')
+        #if md2html:
+            #self.i = "markdown+" + "+".join([
+                #'backtick_code_blocks',
+                #'auto_identifiers',
+                #'fenced_code_attributes',
+                #'definition_lists',
+                #'grid_tables',
+                #'pipe_tables',
+                #'strikeout',
+                #'superscript',
+                #'subscript',
+                #'markdown_in_html_blocks',
+                #'shortcut_reference_links',
+                #'autolink_bare_uris',
+                #'raw_html',
+                #'link_attributes',
+                #'header_attributes',
+                #'footnotes',
+            #])
+            #self.o = 'html5'
+        #else:
+            #self.o = "markdown-" + "-".join([
+                #'raw_html',
+                #'native_divs',
+                #'native_spans',
+            #])
+            #self.i = 'html'

-    def convert(self, text):
-        cmd = (
-            self.executable,
-            '-o-',
-            '--from=%s' % self.i,
-            '--to=%s' % self.o
-        )
-        logging.debug('converting content with Pandoc')
-        p = subprocess.Popen(
-            cmd,
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-        )
+    #def convert(self, text):
+        #cmd = (
+            #self.executable,
+            #'-o-',
+            #'--from=%s' % self.i,
+            #'--to=%s' % self.o
+        #)
+        #logging.debug('converting content with Pandoc')
+        #p = subprocess.Popen(
+            #cmd,
+            #stdin=subprocess.PIPE,
+            #stdout=subprocess.PIPE,
+            #stderr=subprocess.PIPE,
+        #)

-        stdout, stderr = p.communicate(input=text.encode())
-        if stderr:
-            logging.error("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr)
-        return stdout.decode('utf-8').strip()
+        #stdout, stderr = p.communicate(input=text.encode())
+        #if stderr:
+            #logging.error("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr)
+        #return stdout.decode('utf-8').strip()

 # based on http://stackoverflow.com/a/10075210
-class ExifTool(CMDLine):
+class ExifTool(shared.CMDLine):
    """ Handles calling external binary `exiftool` in an efficient way """
    sentinel = "{ready}\n"

@ -419,6 +396,7 @@ class WebImage(object):
        self.alttext = ''
        self.sizes = []
        self.fallbacksize = int(shared.config.get('common','fallbackimg', fallback='720'))
+        self.cl = None

        for size in shared.config.options('downsize'):
            sizeext = shared.config.get('downsize', size)
@ -453,7 +431,7 @@ class WebImage(object):
            )

    def __str__(self):
-        if self.is_downsizeable:
+        if self.is_downsizeable and not self.cl:
            return '\n<figure class="photo"><a target="_blank" class="adaptive" href="%s"><img src="%s" class="adaptimg" alt="%s" /></a><figcaption class=\"caption\">%s%s</figcaption></figure>\n' % (
                self.target,
                self.fallback,
@ -461,8 +439,18 @@ class WebImage(object):
                self.fname,
                self.ext
            )
+        elif self.cl:
+            self.cl = self.cl.replace('.', ' ')
+            return '<img src="%s" class="%s" alt="%s" title="%s%s" />' % (
+                self.fallback,
+                self.cl,
+                self.alttext,
+                self.fname,
+                self.ext
+            )
+
        else:
-            return '\n<figure class="picture"><img src="%s" class="aligncenter" alt="%s" /><figcaption class=\"caption\">%s%s</figcaption></figure>\n' % (
+            return '<img src="%s" class="aligncenter" alt="%s" title="%s%s" />' % (
                self.fallback,
                self.alttext,
                self.fname,
@ -768,10 +756,15 @@ class Content(BaseIter):
        self.front = Taxonomy()

    def populate(self):
+        now = arrow.utcnow().timestamp
        for fpath in self.files:
            item = Singular(fpath, self.images)
            self.append(item.pubtime, item)

+            if item.pubtime > now:
+                logging.warning("skipping future post %s", item.fname)
+                continue
+
            if item.isonfront:
                self.front.append(item.pubtime, item)

@ -804,7 +797,7 @@ class Content(BaseIter):
            'sitemap.txt'
        )
        urls = []
-        for t, item in self.data.items():
+        for item in self.data.values():
            urls.append( "%s/%s/" % (
                shared.config.get('site', 'url'),
                item.fname
@ -814,6 +807,47 @@ class Content(BaseIter):
            logging.info("writing sitemap to %s" % (target))
            f.write("\n".join(urls))

+    def magicphp(self, renderer):
+        redirects = []
+        gones = []
+        rfile = os.path.join(
+            shared.config.get('common', 'basedir'),
+            shared.config.get('common', 'redirects')
+        )
+        if os.path.isfile(rfile):
+            with open(rfile, newline='') as csvfile:
+                r = csv.reader(csvfile, delimiter=' ')
+                for row in r:
+                    redirects.append((row[0], row[1]))
+        for item in self.data.values():
+            redirects.append((item.shortslug, item.fname))
+
+        rfile = os.path.join(
+            shared.config.get('common', 'basedir'),
+            shared.config.get('common', 'gone')
+        )
+        if os.path.isfile(rfile):
+            with open(rfile, newline='') as csvfile:
+                r = csv.reader(csvfile, delimiter=' ')
+                for row in r:
+                    gones.append(row[0])
+
+        tmplvars = {
+            'redirects': redirects,
+            'gones': gones
+        }
+
+        r = renderer.j2.get_template("magic.php").render(tmplvars)
+        target = os.path.abspath(os.path.join(
+            shared.config.get('target', 'builddir'),
+            'magic.php'
+        ))
+
+        with open(target, "w") as html:
+            logging.debug('writing %s', target)
+            html.write(r)
+            html.close()
+
 class Singular(object):
    def __init__(self, path, images):
        logging.debug("initiating singular object from %s", path)
@ -874,6 +908,9 @@ class Singular(object):
                logging.debug("%s not found in images", fname)
                continue

+            if cl:
+                image.cl = cl
+
            logging.debug(
                "replacing %s in content with %s",
                shortcode,
@ -904,6 +941,24 @@ class Singular(object):

        return reactions

+    @property
+    def urls(self):
+        urls = shared.URLREGEX.findall(self.content)
+
+        for reactionurls in self.reactions.values():
+            urls = [*urls, *reactionurls]
+
+        r = []
+        for link in urls:
+            domain = '{uri.netloc}'.format(uri=urllib.parse.urlparse(link))
+            if domain in shared.config.get('site', 'domains'):
+                continue
+            if r.get(link, False):
+                continue
+            r.append(link)
+
+        return r
+
    @property
    def lang(self):
        lang = 'en'
@ -976,7 +1031,7 @@ class Singular(object):
            maybe = self.meta.get(maybe, False)
            if maybe:
                return maybe
-        return self.fname
+        return ''

    @property
    def url(self):
@ -1091,6 +1146,7 @@ class Singular(object):
            'slug': self.fname,
            'shortslug': self.shortslug,
            'rssenclosure': self.rssenclosure,
+            'copies': self.offlinecopies,
        }

    @property
@ -1143,6 +1199,12 @@ class NASG(object):
    def __init__(self):
        # --- set params
        parser = argparse.ArgumentParser(description='Parameters for NASG')
+        parser.add_argument(
+            '--clear',
+            action='store_true',
+            default=False,
+            help='clear build directory in advance'
+        )
        parser.add_argument(
            '--regenerate',
            action='store_true',
@ -1217,6 +1279,13 @@ class NASG(object):
            await searchdb.append(singular)

    def run(self):
+
+        if shared.config.getboolean('params', 'clear'):
+            input('about to clear build directory, press enter to continue')
+            shutil.rmtree(os.path.abspath(
+                shared.config.get('target', 'builddir')
+            ))
+
        loop = asyncio.get_event_loop()

        for d in shared.config.options('target'):
@ -1235,8 +1304,8 @@ class NASG(object):
        content = Content(images)
        content.populate()

+        renderer = Renderer()
        if not shared.config.getboolean('params', 'norender'):
-            renderer = Renderer()
            logging.info("rendering content")
            loop.run_until_complete(self.__acrender(content, renderer))

@ -1249,6 +1318,9 @@ class NASG(object):
            logging.info("rendering sitemap")
            content.sitemap()

+        logging.info("render magic.php")
+        content.magicphp(renderer)
+
        logging.info("copy the static bits")
        src = shared.config.get('source', 'staticdir')
        for item in os.listdir(src):
@ -1264,7 +1336,6 @@ class NASG(object):

        loop.close()

-
 if __name__ == '__main__':
    worker = NASG()
    worker.run()
--- a/new.py
+++ b/new.py
@ -36,7 +36,7 @@ if __name__ == '__main__':
        now = arrow.utcnow()
        parser = argparse.ArgumentParser(description='create doc and print it to stdout')
        parser.add_argument('--tags', '-t', help='; separated, quoted list of tags')
-        parser.add_argument('--date', '-d', help=' YYYY-mm-ddTHH:MM:SS+TZTZ formatted date, if not now')
+        parser.add_argument('--date', '-d', help=' YYYY-mm-ddTHH:MM:SS+TZ formatted date, if not now')
        parser.add_argument('--slug', '-s', help='slug (normally autogenerated from title or pubdate)')
        parser.add_argument('--title', '-l', help='title of new entry')
        parser.add_argument('--bookmark', '-b', help='URL to bookmark')
@ -48,7 +48,7 @@ if __name__ == '__main__':
        args = vars(parser.parse_args())

        if not args['date']:
-            d = now.format("YYYY-MM-DDTHH:mm:ssZ")
+            d = now.format(shared.ARROWISO)
            args['date'] = input('Date [%s]: ' % (d)) or d

        if not args['title']:
--- a/requirements.txt
+++ b/requirements.txt
@ -3,6 +3,7 @@ appdirs==1.4.3
 arrow==0.10.0
 breadability==0.1.20
 chardet==3.0.3
+decorator==4.0.11
 docopt==0.6.2
 httptools==0.0.9
 Jinja2==2.9.6
@ -23,6 +24,7 @@ ujson==1.35
 unicode-slugify==0.1.3
 Unidecode==0.4.20
 uvloop==0.8.0
+validators==0.11.3
 Wand==0.4.4
 websockets==3.3
 Whoosh==2.7.4
--- a/search.py
+++ b/search.py
@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+
+import asyncio
+import uvloop
+import os
+
+from sanic import Sanic
+import sanic.response
+from sanic.log import log as logging
+from whoosh import index
+from whoosh import qparser
+from whoosh import fields
+from whoosh import analysis
+import jinja2
+import shared
+
+def SearchHandler(query, tmpl):
+    response = sanic.response.text(
+        "You seem to have forgot to enter what you want to search for. Please try again.",
+        status=400
+    )
+
+    if not query:
+        return response
+
+    query = query.replace('+', ' AND ').replace(' -', ' NOT ')
+    ix = index.open_dir(os.path.abspath(os.path.join(
+            shared.config.get('target', 'builddir'),
+            shared.config.get('var', 'searchdb')
+    )))
+
+    qp = qparser.MultifieldParser(
+        ["title", "content", "tags"],
+        schema = shared.schema
+    )
+
+    q = qp.parse(query)
+    r = ix.searcher().search(q, sortedby="weight", limit=100)
+    logging.info("results for '%s': %i", query, len(r))
+    results = []
+    for result in r:
+        res = {
+            'title': result['title'],
+            'url': result['url'],
+            'highlight': result.highlights("content"),
+        }
+        if 'img' in result:
+            res['img'] = result['img']
+        results.append(res)
+
+    tvars = {
+        'term': query,
+        'posts': results,
+    }
+
+    logging.info("collected %i results to render", len(results))
+    response = sanic.response.html(tmpl.render(tvars), status=200)
+    return response
+
+if __name__ == '__main__':
+    asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+    app = Sanic()
+
+
+    jldr = jinja2.FileSystemLoader(
+        searchpath=shared.config.get('source', 'templatesdir')
+    )
+    jenv = jinja2.Environment(loader=jldr)
+    tmpl = jenv.get_template('searchresults.html')
+
+    @app.route("/search")
+    async def search(request, methods=["GET"]):
+        query = request.args.get('s')
+        r = SearchHandler(query, tmpl)
+        return r
+
+    app.run(host="127.0.0.1", port=8001, debug=True)
--- a/shared.py
+++ b/shared.py
@ -1,8 +1,11 @@
 import configparser
 import os
+import re
+import glob
+import logging
+import subprocess
 from whoosh import fields
 from whoosh import analysis
-import re

 def __expandconfig(config):
    """ add the dirs to the config automatically """
@ -18,6 +21,8 @@ def __expandconfig(config):
    ))
    return config

+ARROWISO = 'YYYY-MM-DDTHH:mm:ssZ'
+
 URLREGEX = re.compile(
    r'\s+https?\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+'
    r'\.[a-zA-Z0-9\.\/\?\:@\-_=#]*'
@ -74,3 +79,91 @@ config = configparser.ConfigParser(
 )
 config.read('config.ini')
 config = __expandconfig(config)
+
+class CMDLine(object):
+    def __init__(self, executable):
+        self.executable = self._which(executable)
+        if self.executable is None:
+            raise OSError('No %s found in PATH!' % executable)
+            return
+
+    @staticmethod
+    def _which(name):
+        for d in os.environ['PATH'].split(':'):
+            which = glob.glob(os.path.join(d, name), recursive=True)
+            if which:
+                return which.pop()
+        return None
+
+    def __enter__(self):
+        self.process = subprocess.Popen(
+            [self.executable, "-stay_open", "True",  "-@", "-"],
+            universal_newlines=True,
+            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+        return self
+
+    def  __exit__(self, exc_type, exc_value, traceback):
+        self.process.stdin.write("-stay_open\nFalse\n")
+        self.process.stdin.flush()
+
+    def execute(self, *args):
+        args = args + ("-execute\n",)
+        self.process.stdin.write(str.join("\n", args))
+        self.process.stdin.flush()
+        output = ""
+        fd = self.process.stdout.fileno()
+        while not output.endswith(self.sentinel):
+            output += os.read(fd, 4096).decode('utf-8', errors='ignore')
+        return output[:-len(self.sentinel)]
+
+class Pandoc(CMDLine):
+    """ Handles calling external binary `exiftool` in an efficient way """
+    def __init__(self, md2html=True):
+        super().__init__('pandoc')
+        if md2html:
+            self.i = "markdown+" + "+".join([
+                'backtick_code_blocks',
+                'auto_identifiers',
+                'fenced_code_attributes',
+                'definition_lists',
+                'grid_tables',
+                'pipe_tables',
+                'strikeout',
+                'superscript',
+                'subscript',
+                'markdown_in_html_blocks',
+                'shortcut_reference_links',
+                'autolink_bare_uris',
+                'raw_html',
+                'link_attributes',
+                'header_attributes',
+                'footnotes',
+            ])
+            self.o = 'html5'
+        else:
+            self.o = "markdown-" + "-".join([
+                'raw_html',
+                'native_divs',
+                'native_spans',
+            ])
+            self.i = 'html'
+
+    def convert(self, text):
+        cmd = (
+            self.executable,
+            '-o-',
+            '--from=%s' % self.i,
+            '--to=%s' % self.o
+        )
+        logging.debug('converting content with Pandoc')
+        p = subprocess.Popen(
+            cmd,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+
+        stdout, stderr = p.communicate(input=text.encode())
+        if stderr:
+            logging.error("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr)
+        return stdout.decode('utf-8').strip()
--- a/webmention.py
+++ b/webmention.py
@ -0,0 +1,193 @@
+import asyncio
+import uvloop
+import os
+import hashlib
+import json
+import urllib.parse
+import frontmatter
+from sanic import Sanic
+import sanic.response
+from sanic.log import log as logging
+import validators
+import arrow
+from webmentiontools import urlinfo
+import shared
+import envelope
+
+
+class WebmentionHandler(object):
+    def __init__ (self, source, target):
+        self.source = source
+        self.target = target
+        self.now = arrow.utcnow().timestamp
+        logging.info("incoming webmention %s => %s", self.source, self.target)
+
+        self.r = sanic.response.text(
+            "something went wrong on my side, could you please let me know at hello@petermolnar.eu ?",
+            status=500
+        )
+
+    def run(self):
+        if not self._validate():
+            return
+
+        self._parse()
+        self._save()
+        self._notify()
+
+    def _validate(self):
+        test = {
+            self.source: '"souce" parameter is an invalid URL',
+            self.target: '"target" parameter is an invalid URL'
+        }
+        for url, emsg in test.items():
+            logging.debug("validating URL %s", url)
+            if not validators.url(url):
+                self.r = sanic.response.text(
+                    emsg,
+                    status=400
+                )
+                return False
+
+        logging.debug("checking target domain")
+        _target = urllib.parse.urlparse(self.target)
+        _target_domain = '{uri.netloc}'.format(uri=_target)
+        _mydomains = shared.config.get('site', 'domains').split(" ")
+        if not _target_domain in _mydomains:
+            self.r = sanic.response.text(
+                "'target' is not in the list of allowed domains",
+                status=400
+            )
+            return False
+
+        logging.debug("checking selfpings")
+        _source = urllib.parse.urlparse(self.source)
+        _source_domain = '{uri.netloc}'.format(uri=_source)
+        if _source_domain in _mydomains:
+            self.r = sanic.response.text(
+                "selfpings are not allowed",
+                status=400
+            )
+            return False
+
+        return True
+
+    def _parse(self):
+        logging.debug("fetching %s", self.source)
+        self._source = urlinfo.UrlInfo(self.source)
+        if self._source.error:
+            self.r = sanic.response.text(
+                "couldn't fetch 'source' from %s" % (self.source),
+                status=408
+            )
+            return False
+
+        self.source = self._source.realurl
+        if not self._source.linksTo(self.target):
+            self.r = sanic.response.text(
+                "'source' (%s) does not link to 'target' (%s)" % (
+                    self.source,
+                    self.target
+                ),
+                status=400
+            )
+            return False
+
+        logging.debug("fetching %s", self.target)
+        self._target = urlinfo.UrlInfo(self.target)
+        if self._target.error:
+            self.r = sanic.response.text(
+                "couldn't fetch 'target' from %s" % (self.target),
+                status=408
+            )
+        self.target = self._target.realurl
+        #logging.info("parsed webmention:\n%s\n\n%s", self.meta, self.content)
+
+    def _save(self):
+        doc = frontmatter.loads('')
+        doc.metadata = self.meta
+        doc.content = self.content
+        target = os.path.join(
+            shared.config.get('source', 'commentsdir'),
+            self.mhash
+        )
+        if os.path.isfile(target):
+            logging.warning('updating existing webmention %s', target)
+        else:
+            logging.warning('saving incoming webmention to %s', target)
+
+        with open(target, 'wt') as t:
+            t.write(frontmatter.dumps(doc))
+            self.r = sanic.response.text(
+                "accepted",
+                status=202
+            )
+
+    def _notify(self):
+        text = "# webmention\n## Source\n\nauthor\n:    %s\n\nURL\n:    %s\n\nemail\n:    %s\n\ndate\n:    %s\n\n## Target\n\nURL\n:    %s\n\n---\n\n%s" % (
+            self._meta['author'].get('name', self.source),
+            self._meta['author'].get('url', self.source),
+            self._meta['author'].get('email', ''),
+            self._meta['date'],
+            self.target,
+            self.content
+        )
+
+        l = envelope.Letter(
+            sender=(
+                shared.config.get('webmention', 'from_name'),
+                shared.config.get('webmention', 'from_address')
+            ),
+            recipient=(
+                shared.config.get('webmention', 'to_name'),
+                shared.config.get('webmention', 'to_address')
+            ),
+            subject="[webmention] %s" % self.source,
+            text=text
+        )
+        l.make()
+        l.send()
+
+    @property
+    def mhash(self):
+        return hashlib.sha1(json.dumps(self.meta, sort_keys=True).encode('utf-8')).hexdigest()
+
+    @property
+    def meta(self):
+        if hasattr(self, '_meta'):
+            return self._meta
+
+        self._meta = {
+            'author': self._source.author(),
+            'type': self._source.relationType(),
+            'target': self.target,
+            'source': self.source,
+            'date': arrow.get(self._source.pubDate()).format(shared.ARROWISO),
+        }
+        return self._meta
+
+    @property
+    def content(self):
+        if hasattr(self, '_content'):
+            return self._content
+
+        # from HTML to Markdown
+        self._content = shared.Pandoc(False).convert(self._source.content())
+        # from Markdown back to HTML
+        #self._content = shared.Pandoc().convert(tmpcontent)
+        return self._content
+
+
+if __name__ == '__main__':
+    asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+    app = Sanic()
+
+    @app.route("/webmention", methods=["POST"])
+    async def wm(request):
+        source = request.form.get('source')
+        target = request.form.get('target')
+        r = WebmentionHandler(source, target)
+        r.run()
+        return r.r
+
+    app.run(host="127.0.0.1", port=8002, debug=True)