working search and webmentions receiver

2017-05-26 10:14:24 +01:00 · 2017-05-26 10:14:24 +01:00 · 558195288d
commit 558195288d
parent 1b7b354a88
7 changed files with 752 additions and 123 deletions
--- a/envelope.py
+++ b/envelope.py
@ -0,0 +1,193 @@
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.mime.image import MIMEImage
 from email.header import Header
 import email.charset
 from email.generator import Generator
 from io import StringIO
 import mimetypes
 from email.mime.base import MIMEBase
 from email.encoders import encode_base64
 import email.utils
 import time
 import getpass
 import socket
 import shutil
 import requests
 import tempfile
 import atexit
 import os
 import re
 import smtplib
 import logging
 from shared import Pandoc
 class Letter(object):
    def __init__(self, sender=None, recipient=None, subject='', text=''):
        self.sender = sender or (getpass.getuser(), socket.gethostname())
        self.recipient = recipient or self.sender
        self.tmp = tempfile.mkdtemp(
            'envelope_',
            dir=tempfile.gettempdir()
        )
        atexit.register(
            shutil.rmtree,
            os.path.abspath(self.tmp)
        )
        self.text = text;
        self.subject = subject
        self.images = []
        self.ready = None
        self.time = time.time()
        self.headers = {}
    @property
    def _html(self):
        return Pandoc().convert(self.text)
    @property
    def _tmpl(self):
        return "<html><head></head><body>%s</body></html>" % (self._html)
    def __pull_image(self, img):
        fname = os.path.basename(img)
        i = {
            'url': img,
            'name': fname,
            'tmp': os.path.join(self.tmp, fname),
        }
        logging.debug("pulling image %s", i['url'])
        r = requests.get(i['url'], stream=True)
        if r.status_code == 200:
            with open(i['tmp'], 'wb') as f:
                logging.debug("writing image %s", i['tmp'])
                r.raw.decode_content = True
                shutil.copyfileobj(r.raw, f)
                if not isinstance(self.images, list):
                    self.images = []
                self.images.append(i)
    def __pull_images(self):
        mdmatch = re.compile(
            r'!\[.*\]\((.*?\.(?:jpe?g|png|gif)(?:\s+[\'\"]?.*?[\'\"]?)?)\)'
            r'(?:\{.*?\})?'
        )
        [self.__pull_image(img) for img in mdmatch.findall(self.text)]
    def __attach_images(self):
        self.__pull_images()
        for i in self.images:
            cid = 'cid:%s' % (i['name'])
            logging.debug("replacing %s with %s", i['url'], cid)
            self.text = self.text.replace(i['url'], cid)
    def make(self, inline_images=True):
        if inline_images:
            self.__attach_images()
        # Python, by default, encodes utf-8 in base64, which makes plain text
        # mail painful; this overrides and forces Quoted Printable.
        # Quoted Printable is still awful, but better, and we're going to
        # force the mail to be 8bit encoded.
        # Note: enforcing 8bit breaks compatibility with ancient mail clients.
        email.charset.add_charset('utf-8', email.charset.QP, email.charset.QP, 'utf-8')
        mail = MIMEMultipart('alternative')
        # --- setting headers ---
        self.headers = {
            'Subject': Header(re.sub(r"\r?\n?$", "", self.subject, 1), 'utf-8').encode(),
            'To': email.utils.formataddr(self.recipient),
            'From': email.utils.formataddr(self.sender),
            'Date': email.utils.formatdate(self.time, localtime=True)
        }
        for k, v in self.headers.items():
            mail.add_header(k, "%s" % v)
        logging.debug("headers: %s", self.headers)
        # --- adding plain text ---
        text = self.text
        _text = MIMEText(text, 'text', _charset='utf-8')
        # ---
        # this is the part where we overwrite the way Python thinks:
        # force the text to be the actual, unencoded, utf-8.
        # Note:these steps breaks compatibility with ancient mail clients.
        _text.replace_header('Content-Transfer-Encoding', '8bit')
        _text.replace_header('Content-Type', 'text/plain; charset=utf-8')
        _text.set_payload(self.text)
        # ---
        logging.debug("text: %s", _text)
        mail.attach(_text)
        # --- HTML bit ---
        # this is where it gets tricky: the HTML part should be a 'related'
        # wrapper, in which the text and all the related images are sitting
        _envelope = MIMEMultipart('related')
        html = self._tmpl
        _html = MIMEText(html, 'html', _charset='utf-8')
        # ---
        # see above under 'adding plain text'
        _html.replace_header('Content-Transfer-Encoding', '8bit')
        _html.replace_header('Content-Type', 'text/html; charset=utf-8')
        _html.set_payload(html)
        # ---
        logging.debug("HTML: %s", _html)
        _envelope.attach(_html)
        for i in self.images:
            mimetype, encoding = mimetypes.guess_type(i['tmp'])
            mimetype = mimetype or 'application/octet-stream'
            mimetype = mimetype.split('/', 1)
            attachment = MIMEBase(mimetype[0], mimetype[1])
            with open(i['tmp'], 'rb') as img:
                attachment.set_payload(img.read())
                img.close()
            os.unlink(i['tmp'])
            encode_base64(attachment)
            attachment.add_header(
                'Content-Disposition',
                'inline',
                filename=i['name']
            )
            attachment.add_header(
                'Content-ID',
                '<%s>' % (i['name'])
            )
            _envelope.attach(attachment)
        # add the whole html + image pack to the mail
        mail.attach(_envelope)
        str_io = StringIO()
        g = Generator(str_io, False)
        g.flatten(mail)
        self.ready = str_io.getvalue().encode('utf-8')
    def send(self):
        if not self.ready:
            logging.error('this mail is not ready')
            return
        try:
            s = smtplib.SMTP('127.0.0.1', 25)
            # unless you do the encode, you'll get:
            #   File "/usr/local/lib/python3.5/smtplib.py", line 850, in sendmail
            #   msg = _fix_eols(msg).encode('ascii')
            #   UnicodeEncodeError: 'ascii' codec can't encode character '\xa0' in position 1073: ordinal not in range(128)
            s.sendmail(self.headers['From'], self.headers['To'], self.ready)
            s.quit()
        except Exception as e:
            logging.error('sending mail failed with error: %s', e)
--- a/nasg.py
+++ b/nasg.py
@ -8,15 +8,15 @@ import shutil
 import logging
 import json
 import glob
 import subprocess
 import tempfile
 import atexit
 import re
 import hashlib
 import math
 import asyncio
-import magic
+import csv
 import magic
 import arrow
 import wand.image
 import similar_text
@ -27,7 +27,7 @@ import requests
 from breadability.readable import Article
 from whoosh import index
 import jinja2
-
+import urllib.parse
 import shared
 def splitpath(path):
@ -70,13 +70,19 @@ class Indexer(object):
        for url, offlinecopy in singular.offlinecopies.items():
            content_remote.append("%s" % offlinecopy)
        weight = 1
        if singular.isbookmark:
            weight = 10
        if singular.ispage:
            weight = 100
        self.writer.add_document(
            title=singular.title,
            url=singular.url,
            content=" ".join(list(map(str,[*content_real, *content_remote]))),
            date=singular.published.datetime,
            tags=",".join(list(map(str, singular.tags))),
-            weight=1,
+            weight=weight,
            img="%s" % singular.photo
        )
@ -190,35 +196,6 @@ class Renderer(object):
            return True
        return False
    #def rendersingular(self, singular):
        #logging.debug("rendering and saving %s", singular.fname)
        #targetdir = os.path.abspath(os.path.join(
            #shared.config.get('target', 'builddir'),
            #singular.fname
        #))
        #target = os.path.join(targetdir, 'index.html')
        #if not shared.config.get('params', 'force') and os.path.isfile(target):
            #ttime = int(os.path.getmtime(target))
            #if ttime == singular.mtime:
                #logging.debug('%s exists and up-to-date (lastmod: %d)', target, ttime)
                #return
        #if not os.path.isdir(targetdir):
            #os.mkdir(targetdir)
        #tmpl = self.j2.get_template(singular.tmplfile)
        #tmplvars = {
            #'post': singular.tmplvars,
            #'site': self.sitevars,
            #'taxonomy': {},
        #}
        #r = tmpl.render(tmplvars)
        #with open(target, "w") as html:
            #html.write(r)
            #html.close()
            #os.utime(target, (singular.mtime, singular.mtime))
 class BaseIter(object):
    def __init__(self):
@ -248,97 +225,97 @@ class BaseIter(object):
            yield (k, v)
        return
-class CMDLine(object):
+#class CMDLine(object):
-    def __init__(self, executable):
+    #def __init__(self, executable):
-        self.executable = self._which(executable)
+        #self.executable = self._which(executable)
-        if self.executable is None:
+        #if self.executable is None:
-            raise OSError('No %s found in PATH!' % executable)
+            #raise OSError('No %s found in PATH!' % executable)
-            return
+            #return
-    @staticmethod
+    #@staticmethod
-    def _which(name):
+    #def _which(name):
-        for d in os.environ['PATH'].split(':'):
+        #for d in os.environ['PATH'].split(':'):
-            which = glob.glob(os.path.join(d, name), recursive=True)
+            #which = glob.glob(os.path.join(d, name), recursive=True)
-            if which:
+            #if which:
-                return which.pop()
+                #return which.pop()
-        return None
+        #return None
-    def __enter__(self):
+    #def __enter__(self):
-        self.process = subprocess.Popen(
+        #self.process = subprocess.Popen(
-            [self.executable, "-stay_open", "True",  "-@", "-"],
+            #[self.executable, "-stay_open", "True",  "-@", "-"],
-            universal_newlines=True,
+            #universal_newlines=True,
-            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+            #stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-        return self
+        #return self
-    def  __exit__(self, exc_type, exc_value, traceback):
+    #def  __exit__(self, exc_type, exc_value, traceback):
-        self.process.stdin.write("-stay_open\nFalse\n")
+        #self.process.stdin.write("-stay_open\nFalse\n")
-        self.process.stdin.flush()
+        #self.process.stdin.flush()
-    def execute(self, *args):
+    #def execute(self, *args):
-        args = args + ("-execute\n",)
+        #args = args + ("-execute\n",)
-        self.process.stdin.write(str.join("\n", args))
+        #self.process.stdin.write(str.join("\n", args))
-        self.process.stdin.flush()
+        #self.process.stdin.flush()
-        output = ""
+        #output = ""
-        fd = self.process.stdout.fileno()
+        #fd = self.process.stdout.fileno()
-        while not output.endswith(self.sentinel):
+        #while not output.endswith(self.sentinel):
-            output += os.read(fd, 4096).decode('utf-8', errors='ignore')
+            #output += os.read(fd, 4096).decode('utf-8', errors='ignore')
-        return output[:-len(self.sentinel)]
+        #return output[:-len(self.sentinel)]
-class Pandoc(CMDLine):
+#class Pandoc(CMDLine):
-    """ Handles calling external binary `exiftool` in an efficient way """
+    #""" Handles calling external binary `exiftool` in an efficient way """
-    def __init__(self, md2html=True):
+    #def __init__(self, md2html=True):
-        super().__init__('pandoc')
+        #super().__init__('pandoc')
-        if md2html:
+        #if md2html:
-            self.i = "markdown+" + "+".join([
+            #self.i = "markdown+" + "+".join([
-                'backtick_code_blocks',
+                #'backtick_code_blocks',
-                'auto_identifiers',
+                #'auto_identifiers',
-                'fenced_code_attributes',
+                #'fenced_code_attributes',
-                'definition_lists',
+                #'definition_lists',
-                'grid_tables',
+                #'grid_tables',
-                'pipe_tables',
+                #'pipe_tables',
-                'strikeout',
+                #'strikeout',
-                'superscript',
+                #'superscript',
-                'subscript',
+                #'subscript',
-                'markdown_in_html_blocks',
+                #'markdown_in_html_blocks',
-                'shortcut_reference_links',
+                #'shortcut_reference_links',
-                'autolink_bare_uris',
+                #'autolink_bare_uris',
-                'raw_html',
+                #'raw_html',
-                'link_attributes',
+                #'link_attributes',
-                'header_attributes',
+                #'header_attributes',
-                'footnotes',
+                #'footnotes',
-            ])
+            #])
-            self.o = 'html5'
+            #self.o = 'html5'
-        else:
+        #else:
-            self.o = "markdown-" + "-".join([
+            #self.o = "markdown-" + "-".join([
-                'raw_html',
+                #'raw_html',
-                'native_divs',
+                #'native_divs',
-                'native_spans',
+                #'native_spans',
-            ])
+            #])
-            self.i = 'html'
+            #self.i = 'html'
-    def convert(self, text):
+    #def convert(self, text):
-        cmd = (
+        #cmd = (
-            self.executable,
+            #self.executable,
-            '-o-',
+            #'-o-',
-            '--from=%s' % self.i,
+            #'--from=%s' % self.i,
-            '--to=%s' % self.o
+            #'--to=%s' % self.o
-        )
+        #)
-        logging.debug('converting content with Pandoc')
+        #logging.debug('converting content with Pandoc')
-        p = subprocess.Popen(
+        #p = subprocess.Popen(
-            cmd,
+            #cmd,
-            stdin=subprocess.PIPE,
+            #stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
+            #stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
+            #stderr=subprocess.PIPE,
-        )
+        #)
-        stdout, stderr = p.communicate(input=text.encode())
+        #stdout, stderr = p.communicate(input=text.encode())
-        if stderr:
+        #if stderr:
-            logging.error("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr)
+            #logging.error("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr)
-        return stdout.decode('utf-8').strip()
+        #return stdout.decode('utf-8').strip()
 # based on http://stackoverflow.com/a/10075210
-class ExifTool(CMDLine):
+class ExifTool(shared.CMDLine):
    """ Handles calling external binary `exiftool` in an efficient way """
    sentinel = "{ready}\n"
@ -419,6 +396,7 @@ class WebImage(object):
        self.alttext = ''
        self.sizes = []
        self.fallbacksize = int(shared.config.get('common','fallbackimg', fallback='720'))
        self.cl = None
        for size in shared.config.options('downsize'):
            sizeext = shared.config.get('downsize', size)
@ -453,7 +431,7 @@ class WebImage(object):
            )
    def __str__(self):
-        if self.is_downsizeable:
+        if self.is_downsizeable and not self.cl:
            return '\n<figure class="photo"><a target="_blank" class="adaptive" href="%s"><img src="%s" class="adaptimg" alt="%s" /></a><figcaption class=\"caption\">%s%s</figcaption></figure>\n' % (
                self.target,
                self.fallback,
@ -461,8 +439,18 @@ class WebImage(object):
                self.fname,
                self.ext
            )
        elif self.cl:
            self.cl = self.cl.replace('.', ' ')
            return '<img src="%s" class="%s" alt="%s" title="%s%s" />' % (
                self.fallback,
                self.cl,
                self.alttext,
                self.fname,
                self.ext
            )
        else:
-            return '\n<figure class="picture"><img src="%s" class="aligncenter" alt="%s" /><figcaption class=\"caption\">%s%s</figcaption></figure>\n' % (
+            return '<img src="%s" class="aligncenter" alt="%s" title="%s%s" />' % (
                self.fallback,
                self.alttext,
                self.fname,
@ -768,10 +756,15 @@ class Content(BaseIter):
        self.front = Taxonomy()
    def populate(self):
        now = arrow.utcnow().timestamp
        for fpath in self.files:
            item = Singular(fpath, self.images)
            self.append(item.pubtime, item)
            if item.pubtime > now:
                logging.warning("skipping future post %s", item.fname)
                continue
            if item.isonfront:
                self.front.append(item.pubtime, item)
@ -804,7 +797,7 @@ class Content(BaseIter):
            'sitemap.txt'
        )
        urls = []
-        for t, item in self.data.items():
+        for item in self.data.values():
            urls.append( "%s/%s/" % (
                shared.config.get('site', 'url'),
                item.fname
@ -814,6 +807,47 @@ class Content(BaseIter):
            logging.info("writing sitemap to %s" % (target))
            f.write("\n".join(urls))
    def magicphp(self, renderer):
        redirects = []
        gones = []
        rfile = os.path.join(
            shared.config.get('common', 'basedir'),
            shared.config.get('common', 'redirects')
        )
        if os.path.isfile(rfile):
            with open(rfile, newline='') as csvfile:
                r = csv.reader(csvfile, delimiter=' ')
                for row in r:
                    redirects.append((row[0], row[1]))
        for item in self.data.values():
            redirects.append((item.shortslug, item.fname))
        rfile = os.path.join(
            shared.config.get('common', 'basedir'),
            shared.config.get('common', 'gone')
        )
        if os.path.isfile(rfile):
            with open(rfile, newline='') as csvfile:
                r = csv.reader(csvfile, delimiter=' ')
                for row in r:
                    gones.append(row[0])
        tmplvars = {
            'redirects': redirects,
            'gones': gones
        }
        r = renderer.j2.get_template("magic.php").render(tmplvars)
        target = os.path.abspath(os.path.join(
            shared.config.get('target', 'builddir'),
            'magic.php'
        ))
        with open(target, "w") as html:
            logging.debug('writing %s', target)
            html.write(r)
            html.close()
 class Singular(object):
    def __init__(self, path, images):
        logging.debug("initiating singular object from %s", path)
@ -874,6 +908,9 @@ class Singular(object):
                logging.debug("%s not found in images", fname)
                continue
            if cl:
                image.cl = cl
            logging.debug(
                "replacing %s in content with %s",
                shortcode,
@ -904,6 +941,24 @@ class Singular(object):
        return reactions
    @property
    def urls(self):
        urls = shared.URLREGEX.findall(self.content)
        for reactionurls in self.reactions.values():
            urls = [*urls, *reactionurls]
        r = []
        for link in urls:
            domain = '{uri.netloc}'.format(uri=urllib.parse.urlparse(link))
            if domain in shared.config.get('site', 'domains'):
                continue
            if r.get(link, False):
                continue
            r.append(link)
        return r
    @property
    def lang(self):
        lang = 'en'
@ -976,7 +1031,7 @@ class Singular(object):
            maybe = self.meta.get(maybe, False)
            if maybe:
                return maybe
-        return self.fname
+        return ''
    @property
    def url(self):
@ -1091,6 +1146,7 @@ class Singular(object):
            'slug': self.fname,
            'shortslug': self.shortslug,
            'rssenclosure': self.rssenclosure,
            'copies': self.offlinecopies,
        }
    @property
@ -1143,6 +1199,12 @@ class NASG(object):
    def __init__(self):
        # --- set params
        parser = argparse.ArgumentParser(description='Parameters for NASG')
        parser.add_argument(
            '--clear',
            action='store_true',
            default=False,
            help='clear build directory in advance'
        )
        parser.add_argument(
            '--regenerate',
            action='store_true',
@ -1217,6 +1279,13 @@ class NASG(object):
            await searchdb.append(singular)
    def run(self):
        if shared.config.getboolean('params', 'clear'):
            input('about to clear build directory, press enter to continue')
            shutil.rmtree(os.path.abspath(
                shared.config.get('target', 'builddir')
            ))
        loop = asyncio.get_event_loop()
        for d in shared.config.options('target'):
@ -1235,8 +1304,8 @@ class NASG(object):
        content = Content(images)
        content.populate()
        renderer = Renderer()
        if not shared.config.getboolean('params', 'norender'):
            renderer = Renderer()
            logging.info("rendering content")
            loop.run_until_complete(self.__acrender(content, renderer))
@ -1249,6 +1318,9 @@ class NASG(object):
            logging.info("rendering sitemap")
            content.sitemap()
        logging.info("render magic.php")
        content.magicphp(renderer)
        logging.info("copy the static bits")
        src = shared.config.get('source', 'staticdir')
        for item in os.listdir(src):
@ -1264,7 +1336,6 @@ class NASG(object):
        loop.close()
 if __name__ == '__main__':
    worker = NASG()
    worker.run()
--- a/new.py
+++ b/new.py
@ -36,7 +36,7 @@ if __name__ == '__main__':
        now = arrow.utcnow()
        parser = argparse.ArgumentParser(description='create doc and print it to stdout')
        parser.add_argument('--tags', '-t', help='; separated, quoted list of tags')
-        parser.add_argument('--date', '-d', help=' YYYY-mm-ddTHH:MM:SS+TZTZ formatted date, if not now')
+        parser.add_argument('--date', '-d', help=' YYYY-mm-ddTHH:MM:SS+TZ formatted date, if not now')
        parser.add_argument('--slug', '-s', help='slug (normally autogenerated from title or pubdate)')
        parser.add_argument('--title', '-l', help='title of new entry')
        parser.add_argument('--bookmark', '-b', help='URL to bookmark')
@ -48,7 +48,7 @@ if __name__ == '__main__':
        args = vars(parser.parse_args())
        if not args['date']:
-            d = now.format("YYYY-MM-DDTHH:mm:ssZ")
+            d = now.format(shared.ARROWISO)
            args['date'] = input('Date [%s]: ' % (d)) or d
        if not args['title']:
--- a/requirements.txt
+++ b/requirements.txt
@ -3,6 +3,7 @@ appdirs==1.4.3
 arrow==0.10.0
 breadability==0.1.20
 chardet==3.0.3
 decorator==4.0.11
 docopt==0.6.2
 httptools==0.0.9
 Jinja2==2.9.6
@ -23,6 +24,7 @@ ujson==1.35
 unicode-slugify==0.1.3
 Unidecode==0.4.20
 uvloop==0.8.0
 validators==0.11.3
 Wand==0.4.4
 websockets==3.3
 Whoosh==2.7.4
--- a/search.py
+++ b/search.py
@ -0,0 +1,77 @@
 #!/usr/bin/env python3
 import asyncio
 import uvloop
 import os
 from sanic import Sanic
 import sanic.response
 from sanic.log import log as logging
 from whoosh import index
 from whoosh import qparser
 from whoosh import fields
 from whoosh import analysis
 import jinja2
 import shared
 def SearchHandler(query, tmpl):
    response = sanic.response.text(
        "You seem to have forgot to enter what you want to search for. Please try again.",
        status=400
    )
    if not query:
        return response
    query = query.replace('+', ' AND ').replace(' -', ' NOT ')
    ix = index.open_dir(os.path.abspath(os.path.join(
            shared.config.get('target', 'builddir'),
            shared.config.get('var', 'searchdb')
    )))
    qp = qparser.MultifieldParser(
        ["title", "content", "tags"],
        schema = shared.schema
    )
    q = qp.parse(query)
    r = ix.searcher().search(q, sortedby="weight", limit=100)
    logging.info("results for '%s': %i", query, len(r))
    results = []
    for result in r:
        res = {
            'title': result['title'],
            'url': result['url'],
            'highlight': result.highlights("content"),
        }
        if 'img' in result:
            res['img'] = result['img']
        results.append(res)
    tvars = {
        'term': query,
        'posts': results,
    }
    logging.info("collected %i results to render", len(results))
    response = sanic.response.html(tmpl.render(tvars), status=200)
    return response
 if __name__ == '__main__':
    asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
    app = Sanic()
    jldr = jinja2.FileSystemLoader(
        searchpath=shared.config.get('source', 'templatesdir')
    )
    jenv = jinja2.Environment(loader=jldr)
    tmpl = jenv.get_template('searchresults.html')
    @app.route("/search")
    async def search(request, methods=["GET"]):
        query = request.args.get('s')
        r = SearchHandler(query, tmpl)
        return r
    app.run(host="127.0.0.1", port=8001, debug=True)
--- a/shared.py
+++ b/shared.py
@ -1,8 +1,11 @@
 import configparser
 import os
 import re
 import glob
 import logging
 import subprocess
 from whoosh import fields
 from whoosh import analysis
 import re
 def __expandconfig(config):
    """ add the dirs to the config automatically """
@ -18,6 +21,8 @@ def __expandconfig(config):
    ))
    return config
 ARROWISO = 'YYYY-MM-DDTHH:mm:ssZ'
 URLREGEX = re.compile(
    r'\s+https?\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+'
    r'\.[a-zA-Z0-9\.\/\?\:@\-_=#]*'
@ -74,3 +79,91 @@ config = configparser.ConfigParser(
 )
 config.read('config.ini')
 config = __expandconfig(config)
 class CMDLine(object):
    def __init__(self, executable):
        self.executable = self._which(executable)
        if self.executable is None:
            raise OSError('No %s found in PATH!' % executable)
            return
    @staticmethod
    def _which(name):
        for d in os.environ['PATH'].split(':'):
            which = glob.glob(os.path.join(d, name), recursive=True)
            if which:
                return which.pop()
        return None
    def __enter__(self):
        self.process = subprocess.Popen(
            [self.executable, "-stay_open", "True",  "-@", "-"],
            universal_newlines=True,
            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
        return self
    def  __exit__(self, exc_type, exc_value, traceback):
        self.process.stdin.write("-stay_open\nFalse\n")
        self.process.stdin.flush()
    def execute(self, *args):
        args = args + ("-execute\n",)
        self.process.stdin.write(str.join("\n", args))
        self.process.stdin.flush()
        output = ""
        fd = self.process.stdout.fileno()
        while not output.endswith(self.sentinel):
            output += os.read(fd, 4096).decode('utf-8', errors='ignore')
        return output[:-len(self.sentinel)]
 class Pandoc(CMDLine):
    """ Handles calling external binary `exiftool` in an efficient way """
    def __init__(self, md2html=True):
        super().__init__('pandoc')
        if md2html:
            self.i = "markdown+" + "+".join([
                'backtick_code_blocks',
                'auto_identifiers',
                'fenced_code_attributes',
                'definition_lists',
                'grid_tables',
                'pipe_tables',
                'strikeout',
                'superscript',
                'subscript',
                'markdown_in_html_blocks',
                'shortcut_reference_links',
                'autolink_bare_uris',
                'raw_html',
                'link_attributes',
                'header_attributes',
                'footnotes',
            ])
            self.o = 'html5'
        else:
            self.o = "markdown-" + "-".join([
                'raw_html',
                'native_divs',
                'native_spans',
            ])
            self.i = 'html'
    def convert(self, text):
        cmd = (
            self.executable,
            '-o-',
            '--from=%s' % self.i,
            '--to=%s' % self.o
        )
        logging.debug('converting content with Pandoc')
        p = subprocess.Popen(
            cmd,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        stdout, stderr = p.communicate(input=text.encode())
        if stderr:
            logging.error("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr)
        return stdout.decode('utf-8').strip()
--- a/webmention.py
+++ b/webmention.py
@ -0,0 +1,193 @@
 import asyncio
 import uvloop
 import os
 import hashlib
 import json
 import urllib.parse
 import frontmatter
 from sanic import Sanic
 import sanic.response
 from sanic.log import log as logging
 import validators
 import arrow
 from webmentiontools import urlinfo
 import shared
 import envelope
 class WebmentionHandler(object):
    def __init__ (self, source, target):
        self.source = source
        self.target = target
        self.now = arrow.utcnow().timestamp
        logging.info("incoming webmention %s => %s", self.source, self.target)
        self.r = sanic.response.text(
            "something went wrong on my side, could you please let me know at hello@petermolnar.eu ?",
            status=500
        )
    def run(self):
        if not self._validate():
            return
        self._parse()
        self._save()
        self._notify()
    def _validate(self):
        test = {
            self.source: '"souce" parameter is an invalid URL',
            self.target: '"target" parameter is an invalid URL'
        }
        for url, emsg in test.items():
            logging.debug("validating URL %s", url)
            if not validators.url(url):
                self.r = sanic.response.text(
                    emsg,
                    status=400
                )
                return False
        logging.debug("checking target domain")
        _target = urllib.parse.urlparse(self.target)
        _target_domain = '{uri.netloc}'.format(uri=_target)
        _mydomains = shared.config.get('site', 'domains').split(" ")
        if not _target_domain in _mydomains:
            self.r = sanic.response.text(
                "'target' is not in the list of allowed domains",
                status=400
            )
            return False
        logging.debug("checking selfpings")
        _source = urllib.parse.urlparse(self.source)
        _source_domain = '{uri.netloc}'.format(uri=_source)
        if _source_domain in _mydomains:
            self.r = sanic.response.text(
                "selfpings are not allowed",
                status=400
            )
            return False
        return True
    def _parse(self):
        logging.debug("fetching %s", self.source)
        self._source = urlinfo.UrlInfo(self.source)
        if self._source.error:
            self.r = sanic.response.text(
                "couldn't fetch 'source' from %s" % (self.source),
                status=408
            )
            return False
        self.source = self._source.realurl
        if not self._source.linksTo(self.target):
            self.r = sanic.response.text(
                "'source' (%s) does not link to 'target' (%s)" % (
                    self.source,
                    self.target
                ),
                status=400
            )
            return False
        logging.debug("fetching %s", self.target)
        self._target = urlinfo.UrlInfo(self.target)
        if self._target.error:
            self.r = sanic.response.text(
                "couldn't fetch 'target' from %s" % (self.target),
                status=408
            )
        self.target = self._target.realurl
        #logging.info("parsed webmention:\n%s\n\n%s", self.meta, self.content)
    def _save(self):
        doc = frontmatter.loads('')
        doc.metadata = self.meta
        doc.content = self.content
        target = os.path.join(
            shared.config.get('source', 'commentsdir'),
            self.mhash
        )
        if os.path.isfile(target):
            logging.warning('updating existing webmention %s', target)
        else:
            logging.warning('saving incoming webmention to %s', target)
        with open(target, 'wt') as t:
            t.write(frontmatter.dumps(doc))
            self.r = sanic.response.text(
                "accepted",
                status=202
            )
    def _notify(self):
        text = "# webmention\n## Source\n\nauthor\n:    %s\n\nURL\n:    %s\n\nemail\n:    %s\n\ndate\n:    %s\n\n## Target\n\nURL\n:    %s\n\n---\n\n%s" % (
            self._meta['author'].get('name', self.source),
            self._meta['author'].get('url', self.source),
            self._meta['author'].get('email', ''),
            self._meta['date'],
            self.target,
            self.content
        )
        l = envelope.Letter(
            sender=(
                shared.config.get('webmention', 'from_name'),
                shared.config.get('webmention', 'from_address')
            ),
            recipient=(
                shared.config.get('webmention', 'to_name'),
                shared.config.get('webmention', 'to_address')
            ),
            subject="[webmention] %s" % self.source,
            text=text
        )
        l.make()
        l.send()
    @property
    def mhash(self):
        return hashlib.sha1(json.dumps(self.meta, sort_keys=True).encode('utf-8')).hexdigest()
    @property
    def meta(self):
        if hasattr(self, '_meta'):
            return self._meta
        self._meta = {
            'author': self._source.author(),
            'type': self._source.relationType(),
            'target': self.target,
            'source': self.source,
            'date': arrow.get(self._source.pubDate()).format(shared.ARROWISO),
        }
        return self._meta
    @property
    def content(self):
        if hasattr(self, '_content'):
            return self._content
        # from HTML to Markdown
        self._content = shared.Pandoc(False).convert(self._source.content())
        # from Markdown back to HTML
        #self._content = shared.Pandoc().convert(tmpcontent)
        return self._content
 if __name__ == '__main__':
    asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
    app = Sanic()
    @app.route("/webmention", methods=["POST"])
    async def wm(request):
        source = request.form.get('source')
        target = request.form.get('target')
        r = WebmentionHandler(source, target)
        r.run()
        return r.r
    app.run(host="127.0.0.1", port=8002, debug=True)