working search and webmentions receiver
This commit is contained in:
parent
1b7b354a88
commit
558195288d
7 changed files with 752 additions and 123 deletions
193
envelope.py
Normal file
193
envelope.py
Normal file
|
@ -0,0 +1,193 @@
|
|||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from email.mime.image import MIMEImage
|
||||
from email.header import Header
|
||||
import email.charset
|
||||
from email.generator import Generator
|
||||
from io import StringIO
|
||||
import mimetypes
|
||||
from email.mime.base import MIMEBase
|
||||
from email.encoders import encode_base64
|
||||
import email.utils
|
||||
|
||||
import time
|
||||
import getpass
|
||||
import socket
|
||||
import shutil
|
||||
import requests
|
||||
import tempfile
|
||||
import atexit
|
||||
import os
|
||||
import re
|
||||
import smtplib
|
||||
import logging
|
||||
from shared import Pandoc
|
||||
|
||||
class Letter(object):
|
||||
def __init__(self, sender=None, recipient=None, subject='', text=''):
|
||||
self.sender = sender or (getpass.getuser(), socket.gethostname())
|
||||
self.recipient = recipient or self.sender
|
||||
|
||||
self.tmp = tempfile.mkdtemp(
|
||||
'envelope_',
|
||||
dir=tempfile.gettempdir()
|
||||
)
|
||||
atexit.register(
|
||||
shutil.rmtree,
|
||||
os.path.abspath(self.tmp)
|
||||
)
|
||||
self.text = text;
|
||||
self.subject = subject
|
||||
self.images = []
|
||||
self.ready = None
|
||||
self.time = time.time()
|
||||
self.headers = {}
|
||||
|
||||
@property
|
||||
def _html(self):
|
||||
return Pandoc().convert(self.text)
|
||||
|
||||
@property
|
||||
def _tmpl(self):
|
||||
return "<html><head></head><body>%s</body></html>" % (self._html)
|
||||
|
||||
def __pull_image(self, img):
|
||||
fname = os.path.basename(img)
|
||||
i = {
|
||||
'url': img,
|
||||
'name': fname,
|
||||
'tmp': os.path.join(self.tmp, fname),
|
||||
}
|
||||
|
||||
logging.debug("pulling image %s", i['url'])
|
||||
r = requests.get(i['url'], stream=True)
|
||||
if r.status_code == 200:
|
||||
with open(i['tmp'], 'wb') as f:
|
||||
logging.debug("writing image %s", i['tmp'])
|
||||
r.raw.decode_content = True
|
||||
shutil.copyfileobj(r.raw, f)
|
||||
if not isinstance(self.images, list):
|
||||
self.images = []
|
||||
self.images.append(i)
|
||||
|
||||
|
||||
def __pull_images(self):
|
||||
mdmatch = re.compile(
|
||||
r'!\[.*\]\((.*?\.(?:jpe?g|png|gif)(?:\s+[\'\"]?.*?[\'\"]?)?)\)'
|
||||
r'(?:\{.*?\})?'
|
||||
)
|
||||
[self.__pull_image(img) for img in mdmatch.findall(self.text)]
|
||||
|
||||
|
||||
def __attach_images(self):
|
||||
self.__pull_images()
|
||||
for i in self.images:
|
||||
cid = 'cid:%s' % (i['name'])
|
||||
logging.debug("replacing %s with %s", i['url'], cid)
|
||||
self.text = self.text.replace(i['url'], cid)
|
||||
|
||||
|
||||
def make(self, inline_images=True):
|
||||
if inline_images:
|
||||
self.__attach_images()
|
||||
|
||||
|
||||
# Python, by default, encodes utf-8 in base64, which makes plain text
|
||||
# mail painful; this overrides and forces Quoted Printable.
|
||||
# Quoted Printable is still awful, but better, and we're going to
|
||||
# force the mail to be 8bit encoded.
|
||||
# Note: enforcing 8bit breaks compatibility with ancient mail clients.
|
||||
email.charset.add_charset('utf-8', email.charset.QP, email.charset.QP, 'utf-8')
|
||||
|
||||
mail = MIMEMultipart('alternative')
|
||||
|
||||
# --- setting headers ---
|
||||
self.headers = {
|
||||
'Subject': Header(re.sub(r"\r?\n?$", "", self.subject, 1), 'utf-8').encode(),
|
||||
'To': email.utils.formataddr(self.recipient),
|
||||
'From': email.utils.formataddr(self.sender),
|
||||
'Date': email.utils.formatdate(self.time, localtime=True)
|
||||
}
|
||||
|
||||
for k, v in self.headers.items():
|
||||
mail.add_header(k, "%s" % v)
|
||||
logging.debug("headers: %s", self.headers)
|
||||
|
||||
# --- adding plain text ---
|
||||
text = self.text
|
||||
_text = MIMEText(text, 'text', _charset='utf-8')
|
||||
# ---
|
||||
# this is the part where we overwrite the way Python thinks:
|
||||
# force the text to be the actual, unencoded, utf-8.
|
||||
# Note:these steps breaks compatibility with ancient mail clients.
|
||||
_text.replace_header('Content-Transfer-Encoding', '8bit')
|
||||
_text.replace_header('Content-Type', 'text/plain; charset=utf-8')
|
||||
_text.set_payload(self.text)
|
||||
# ---
|
||||
logging.debug("text: %s", _text)
|
||||
mail.attach(_text)
|
||||
|
||||
# --- HTML bit ---
|
||||
# this is where it gets tricky: the HTML part should be a 'related'
|
||||
# wrapper, in which the text and all the related images are sitting
|
||||
_envelope = MIMEMultipart('related')
|
||||
|
||||
|
||||
html = self._tmpl
|
||||
_html = MIMEText(html, 'html', _charset='utf-8')
|
||||
# ---
|
||||
# see above under 'adding plain text'
|
||||
_html.replace_header('Content-Transfer-Encoding', '8bit')
|
||||
_html.replace_header('Content-Type', 'text/html; charset=utf-8')
|
||||
_html.set_payload(html)
|
||||
# ---
|
||||
logging.debug("HTML: %s", _html)
|
||||
_envelope.attach(_html)
|
||||
|
||||
for i in self.images:
|
||||
mimetype, encoding = mimetypes.guess_type(i['tmp'])
|
||||
mimetype = mimetype or 'application/octet-stream'
|
||||
mimetype = mimetype.split('/', 1)
|
||||
attachment = MIMEBase(mimetype[0], mimetype[1])
|
||||
with open(i['tmp'], 'rb') as img:
|
||||
attachment.set_payload(img.read())
|
||||
img.close()
|
||||
os.unlink(i['tmp'])
|
||||
|
||||
encode_base64(attachment)
|
||||
attachment.add_header(
|
||||
'Content-Disposition',
|
||||
'inline',
|
||||
filename=i['name']
|
||||
)
|
||||
attachment.add_header(
|
||||
'Content-ID',
|
||||
'<%s>' % (i['name'])
|
||||
)
|
||||
|
||||
_envelope.attach(attachment)
|
||||
|
||||
# add the whole html + image pack to the mail
|
||||
mail.attach(_envelope)
|
||||
|
||||
str_io = StringIO()
|
||||
g = Generator(str_io, False)
|
||||
g.flatten(mail)
|
||||
|
||||
self.ready = str_io.getvalue().encode('utf-8')
|
||||
|
||||
def send(self):
|
||||
if not self.ready:
|
||||
logging.error('this mail is not ready')
|
||||
return
|
||||
|
||||
try:
|
||||
s = smtplib.SMTP('127.0.0.1', 25)
|
||||
# unless you do the encode, you'll get:
|
||||
# File "/usr/local/lib/python3.5/smtplib.py", line 850, in sendmail
|
||||
# msg = _fix_eols(msg).encode('ascii')
|
||||
# UnicodeEncodeError: 'ascii' codec can't encode character '\xa0' in position 1073: ordinal not in range(128)
|
||||
s.sendmail(self.headers['From'], self.headers['To'], self.ready)
|
||||
s.quit()
|
||||
except Exception as e:
|
||||
logging.error('sending mail failed with error: %s', e)
|
311
nasg.py
Normal file → Executable file
311
nasg.py
Normal file → Executable file
|
@ -8,15 +8,15 @@ import shutil
|
|||
import logging
|
||||
import json
|
||||
import glob
|
||||
import subprocess
|
||||
import tempfile
|
||||
import atexit
|
||||
import re
|
||||
import hashlib
|
||||
import math
|
||||
import asyncio
|
||||
import magic
|
||||
import csv
|
||||
|
||||
import magic
|
||||
import arrow
|
||||
import wand.image
|
||||
import similar_text
|
||||
|
@ -27,7 +27,7 @@ import requests
|
|||
from breadability.readable import Article
|
||||
from whoosh import index
|
||||
import jinja2
|
||||
|
||||
import urllib.parse
|
||||
import shared
|
||||
|
||||
def splitpath(path):
|
||||
|
@ -70,13 +70,19 @@ class Indexer(object):
|
|||
for url, offlinecopy in singular.offlinecopies.items():
|
||||
content_remote.append("%s" % offlinecopy)
|
||||
|
||||
weight = 1
|
||||
if singular.isbookmark:
|
||||
weight = 10
|
||||
if singular.ispage:
|
||||
weight = 100
|
||||
|
||||
self.writer.add_document(
|
||||
title=singular.title,
|
||||
url=singular.url,
|
||||
content=" ".join(list(map(str,[*content_real, *content_remote]))),
|
||||
date=singular.published.datetime,
|
||||
tags=",".join(list(map(str, singular.tags))),
|
||||
weight=1,
|
||||
weight=weight,
|
||||
img="%s" % singular.photo
|
||||
)
|
||||
|
||||
|
@ -190,35 +196,6 @@ class Renderer(object):
|
|||
return True
|
||||
return False
|
||||
|
||||
#def rendersingular(self, singular):
|
||||
#logging.debug("rendering and saving %s", singular.fname)
|
||||
#targetdir = os.path.abspath(os.path.join(
|
||||
#shared.config.get('target', 'builddir'),
|
||||
#singular.fname
|
||||
#))
|
||||
#target = os.path.join(targetdir, 'index.html')
|
||||
|
||||
#if not shared.config.get('params', 'force') and os.path.isfile(target):
|
||||
#ttime = int(os.path.getmtime(target))
|
||||
#if ttime == singular.mtime:
|
||||
#logging.debug('%s exists and up-to-date (lastmod: %d)', target, ttime)
|
||||
#return
|
||||
|
||||
#if not os.path.isdir(targetdir):
|
||||
#os.mkdir(targetdir)
|
||||
|
||||
#tmpl = self.j2.get_template(singular.tmplfile)
|
||||
#tmplvars = {
|
||||
#'post': singular.tmplvars,
|
||||
#'site': self.sitevars,
|
||||
#'taxonomy': {},
|
||||
#}
|
||||
#r = tmpl.render(tmplvars)
|
||||
#with open(target, "w") as html:
|
||||
#html.write(r)
|
||||
#html.close()
|
||||
#os.utime(target, (singular.mtime, singular.mtime))
|
||||
|
||||
|
||||
class BaseIter(object):
|
||||
def __init__(self):
|
||||
|
@ -248,97 +225,97 @@ class BaseIter(object):
|
|||
yield (k, v)
|
||||
return
|
||||
|
||||
class CMDLine(object):
|
||||
def __init__(self, executable):
|
||||
self.executable = self._which(executable)
|
||||
if self.executable is None:
|
||||
raise OSError('No %s found in PATH!' % executable)
|
||||
return
|
||||
#class CMDLine(object):
|
||||
#def __init__(self, executable):
|
||||
#self.executable = self._which(executable)
|
||||
#if self.executable is None:
|
||||
#raise OSError('No %s found in PATH!' % executable)
|
||||
#return
|
||||
|
||||
@staticmethod
|
||||
def _which(name):
|
||||
for d in os.environ['PATH'].split(':'):
|
||||
which = glob.glob(os.path.join(d, name), recursive=True)
|
||||
if which:
|
||||
return which.pop()
|
||||
return None
|
||||
#@staticmethod
|
||||
#def _which(name):
|
||||
#for d in os.environ['PATH'].split(':'):
|
||||
#which = glob.glob(os.path.join(d, name), recursive=True)
|
||||
#if which:
|
||||
#return which.pop()
|
||||
#return None
|
||||
|
||||
def __enter__(self):
|
||||
self.process = subprocess.Popen(
|
||||
[self.executable, "-stay_open", "True", "-@", "-"],
|
||||
universal_newlines=True,
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return self
|
||||
#def __enter__(self):
|
||||
#self.process = subprocess.Popen(
|
||||
#[self.executable, "-stay_open", "True", "-@", "-"],
|
||||
#universal_newlines=True,
|
||||
#stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
#return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.process.stdin.write("-stay_open\nFalse\n")
|
||||
self.process.stdin.flush()
|
||||
#def __exit__(self, exc_type, exc_value, traceback):
|
||||
#self.process.stdin.write("-stay_open\nFalse\n")
|
||||
#self.process.stdin.flush()
|
||||
|
||||
def execute(self, *args):
|
||||
args = args + ("-execute\n",)
|
||||
self.process.stdin.write(str.join("\n", args))
|
||||
self.process.stdin.flush()
|
||||
output = ""
|
||||
fd = self.process.stdout.fileno()
|
||||
while not output.endswith(self.sentinel):
|
||||
output += os.read(fd, 4096).decode('utf-8', errors='ignore')
|
||||
return output[:-len(self.sentinel)]
|
||||
#def execute(self, *args):
|
||||
#args = args + ("-execute\n",)
|
||||
#self.process.stdin.write(str.join("\n", args))
|
||||
#self.process.stdin.flush()
|
||||
#output = ""
|
||||
#fd = self.process.stdout.fileno()
|
||||
#while not output.endswith(self.sentinel):
|
||||
#output += os.read(fd, 4096).decode('utf-8', errors='ignore')
|
||||
#return output[:-len(self.sentinel)]
|
||||
|
||||
|
||||
class Pandoc(CMDLine):
|
||||
""" Handles calling external binary `exiftool` in an efficient way """
|
||||
def __init__(self, md2html=True):
|
||||
super().__init__('pandoc')
|
||||
if md2html:
|
||||
self.i = "markdown+" + "+".join([
|
||||
'backtick_code_blocks',
|
||||
'auto_identifiers',
|
||||
'fenced_code_attributes',
|
||||
'definition_lists',
|
||||
'grid_tables',
|
||||
'pipe_tables',
|
||||
'strikeout',
|
||||
'superscript',
|
||||
'subscript',
|
||||
'markdown_in_html_blocks',
|
||||
'shortcut_reference_links',
|
||||
'autolink_bare_uris',
|
||||
'raw_html',
|
||||
'link_attributes',
|
||||
'header_attributes',
|
||||
'footnotes',
|
||||
])
|
||||
self.o = 'html5'
|
||||
else:
|
||||
self.o = "markdown-" + "-".join([
|
||||
'raw_html',
|
||||
'native_divs',
|
||||
'native_spans',
|
||||
])
|
||||
self.i = 'html'
|
||||
#class Pandoc(CMDLine):
|
||||
#""" Handles calling external binary `exiftool` in an efficient way """
|
||||
#def __init__(self, md2html=True):
|
||||
#super().__init__('pandoc')
|
||||
#if md2html:
|
||||
#self.i = "markdown+" + "+".join([
|
||||
#'backtick_code_blocks',
|
||||
#'auto_identifiers',
|
||||
#'fenced_code_attributes',
|
||||
#'definition_lists',
|
||||
#'grid_tables',
|
||||
#'pipe_tables',
|
||||
#'strikeout',
|
||||
#'superscript',
|
||||
#'subscript',
|
||||
#'markdown_in_html_blocks',
|
||||
#'shortcut_reference_links',
|
||||
#'autolink_bare_uris',
|
||||
#'raw_html',
|
||||
#'link_attributes',
|
||||
#'header_attributes',
|
||||
#'footnotes',
|
||||
#])
|
||||
#self.o = 'html5'
|
||||
#else:
|
||||
#self.o = "markdown-" + "-".join([
|
||||
#'raw_html',
|
||||
#'native_divs',
|
||||
#'native_spans',
|
||||
#])
|
||||
#self.i = 'html'
|
||||
|
||||
def convert(self, text):
|
||||
cmd = (
|
||||
self.executable,
|
||||
'-o-',
|
||||
'--from=%s' % self.i,
|
||||
'--to=%s' % self.o
|
||||
)
|
||||
logging.debug('converting content with Pandoc')
|
||||
p = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
#def convert(self, text):
|
||||
#cmd = (
|
||||
#self.executable,
|
||||
#'-o-',
|
||||
#'--from=%s' % self.i,
|
||||
#'--to=%s' % self.o
|
||||
#)
|
||||
#logging.debug('converting content with Pandoc')
|
||||
#p = subprocess.Popen(
|
||||
#cmd,
|
||||
#stdin=subprocess.PIPE,
|
||||
#stdout=subprocess.PIPE,
|
||||
#stderr=subprocess.PIPE,
|
||||
#)
|
||||
|
||||
stdout, stderr = p.communicate(input=text.encode())
|
||||
if stderr:
|
||||
logging.error("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr)
|
||||
return stdout.decode('utf-8').strip()
|
||||
#stdout, stderr = p.communicate(input=text.encode())
|
||||
#if stderr:
|
||||
#logging.error("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr)
|
||||
#return stdout.decode('utf-8').strip()
|
||||
|
||||
# based on http://stackoverflow.com/a/10075210
|
||||
class ExifTool(CMDLine):
|
||||
class ExifTool(shared.CMDLine):
|
||||
""" Handles calling external binary `exiftool` in an efficient way """
|
||||
sentinel = "{ready}\n"
|
||||
|
||||
|
@ -419,6 +396,7 @@ class WebImage(object):
|
|||
self.alttext = ''
|
||||
self.sizes = []
|
||||
self.fallbacksize = int(shared.config.get('common','fallbackimg', fallback='720'))
|
||||
self.cl = None
|
||||
|
||||
for size in shared.config.options('downsize'):
|
||||
sizeext = shared.config.get('downsize', size)
|
||||
|
@ -453,7 +431,7 @@ class WebImage(object):
|
|||
)
|
||||
|
||||
def __str__(self):
|
||||
if self.is_downsizeable:
|
||||
if self.is_downsizeable and not self.cl:
|
||||
return '\n<figure class="photo"><a target="_blank" class="adaptive" href="%s"><img src="%s" class="adaptimg" alt="%s" /></a><figcaption class=\"caption\">%s%s</figcaption></figure>\n' % (
|
||||
self.target,
|
||||
self.fallback,
|
||||
|
@ -461,8 +439,18 @@ class WebImage(object):
|
|||
self.fname,
|
||||
self.ext
|
||||
)
|
||||
elif self.cl:
|
||||
self.cl = self.cl.replace('.', ' ')
|
||||
return '<img src="%s" class="%s" alt="%s" title="%s%s" />' % (
|
||||
self.fallback,
|
||||
self.cl,
|
||||
self.alttext,
|
||||
self.fname,
|
||||
self.ext
|
||||
)
|
||||
|
||||
else:
|
||||
return '\n<figure class="picture"><img src="%s" class="aligncenter" alt="%s" /><figcaption class=\"caption\">%s%s</figcaption></figure>\n' % (
|
||||
return '<img src="%s" class="aligncenter" alt="%s" title="%s%s" />' % (
|
||||
self.fallback,
|
||||
self.alttext,
|
||||
self.fname,
|
||||
|
@ -768,10 +756,15 @@ class Content(BaseIter):
|
|||
self.front = Taxonomy()
|
||||
|
||||
def populate(self):
|
||||
now = arrow.utcnow().timestamp
|
||||
for fpath in self.files:
|
||||
item = Singular(fpath, self.images)
|
||||
self.append(item.pubtime, item)
|
||||
|
||||
if item.pubtime > now:
|
||||
logging.warning("skipping future post %s", item.fname)
|
||||
continue
|
||||
|
||||
if item.isonfront:
|
||||
self.front.append(item.pubtime, item)
|
||||
|
||||
|
@ -804,7 +797,7 @@ class Content(BaseIter):
|
|||
'sitemap.txt'
|
||||
)
|
||||
urls = []
|
||||
for t, item in self.data.items():
|
||||
for item in self.data.values():
|
||||
urls.append( "%s/%s/" % (
|
||||
shared.config.get('site', 'url'),
|
||||
item.fname
|
||||
|
@ -814,6 +807,47 @@ class Content(BaseIter):
|
|||
logging.info("writing sitemap to %s" % (target))
|
||||
f.write("\n".join(urls))
|
||||
|
||||
def magicphp(self, renderer):
|
||||
redirects = []
|
||||
gones = []
|
||||
rfile = os.path.join(
|
||||
shared.config.get('common', 'basedir'),
|
||||
shared.config.get('common', 'redirects')
|
||||
)
|
||||
if os.path.isfile(rfile):
|
||||
with open(rfile, newline='') as csvfile:
|
||||
r = csv.reader(csvfile, delimiter=' ')
|
||||
for row in r:
|
||||
redirects.append((row[0], row[1]))
|
||||
for item in self.data.values():
|
||||
redirects.append((item.shortslug, item.fname))
|
||||
|
||||
rfile = os.path.join(
|
||||
shared.config.get('common', 'basedir'),
|
||||
shared.config.get('common', 'gone')
|
||||
)
|
||||
if os.path.isfile(rfile):
|
||||
with open(rfile, newline='') as csvfile:
|
||||
r = csv.reader(csvfile, delimiter=' ')
|
||||
for row in r:
|
||||
gones.append(row[0])
|
||||
|
||||
tmplvars = {
|
||||
'redirects': redirects,
|
||||
'gones': gones
|
||||
}
|
||||
|
||||
r = renderer.j2.get_template("magic.php").render(tmplvars)
|
||||
target = os.path.abspath(os.path.join(
|
||||
shared.config.get('target', 'builddir'),
|
||||
'magic.php'
|
||||
))
|
||||
|
||||
with open(target, "w") as html:
|
||||
logging.debug('writing %s', target)
|
||||
html.write(r)
|
||||
html.close()
|
||||
|
||||
class Singular(object):
|
||||
def __init__(self, path, images):
|
||||
logging.debug("initiating singular object from %s", path)
|
||||
|
@ -874,6 +908,9 @@ class Singular(object):
|
|||
logging.debug("%s not found in images", fname)
|
||||
continue
|
||||
|
||||
if cl:
|
||||
image.cl = cl
|
||||
|
||||
logging.debug(
|
||||
"replacing %s in content with %s",
|
||||
shortcode,
|
||||
|
@ -904,6 +941,24 @@ class Singular(object):
|
|||
|
||||
return reactions
|
||||
|
||||
@property
|
||||
def urls(self):
|
||||
urls = shared.URLREGEX.findall(self.content)
|
||||
|
||||
for reactionurls in self.reactions.values():
|
||||
urls = [*urls, *reactionurls]
|
||||
|
||||
r = []
|
||||
for link in urls:
|
||||
domain = '{uri.netloc}'.format(uri=urllib.parse.urlparse(link))
|
||||
if domain in shared.config.get('site', 'domains'):
|
||||
continue
|
||||
if r.get(link, False):
|
||||
continue
|
||||
r.append(link)
|
||||
|
||||
return r
|
||||
|
||||
@property
|
||||
def lang(self):
|
||||
lang = 'en'
|
||||
|
@ -976,7 +1031,7 @@ class Singular(object):
|
|||
maybe = self.meta.get(maybe, False)
|
||||
if maybe:
|
||||
return maybe
|
||||
return self.fname
|
||||
return ''
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
|
@ -1091,6 +1146,7 @@ class Singular(object):
|
|||
'slug': self.fname,
|
||||
'shortslug': self.shortslug,
|
||||
'rssenclosure': self.rssenclosure,
|
||||
'copies': self.offlinecopies,
|
||||
}
|
||||
|
||||
@property
|
||||
|
@ -1143,6 +1199,12 @@ class NASG(object):
|
|||
def __init__(self):
|
||||
# --- set params
|
||||
parser = argparse.ArgumentParser(description='Parameters for NASG')
|
||||
parser.add_argument(
|
||||
'--clear',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='clear build directory in advance'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--regenerate',
|
||||
action='store_true',
|
||||
|
@ -1217,6 +1279,13 @@ class NASG(object):
|
|||
await searchdb.append(singular)
|
||||
|
||||
def run(self):
|
||||
|
||||
if shared.config.getboolean('params', 'clear'):
|
||||
input('about to clear build directory, press enter to continue')
|
||||
shutil.rmtree(os.path.abspath(
|
||||
shared.config.get('target', 'builddir')
|
||||
))
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
for d in shared.config.options('target'):
|
||||
|
@ -1235,8 +1304,8 @@ class NASG(object):
|
|||
content = Content(images)
|
||||
content.populate()
|
||||
|
||||
if not shared.config.getboolean('params', 'norender'):
|
||||
renderer = Renderer()
|
||||
if not shared.config.getboolean('params', 'norender'):
|
||||
logging.info("rendering content")
|
||||
loop.run_until_complete(self.__acrender(content, renderer))
|
||||
|
||||
|
@ -1249,6 +1318,9 @@ class NASG(object):
|
|||
logging.info("rendering sitemap")
|
||||
content.sitemap()
|
||||
|
||||
logging.info("render magic.php")
|
||||
content.magicphp(renderer)
|
||||
|
||||
logging.info("copy the static bits")
|
||||
src = shared.config.get('source', 'staticdir')
|
||||
for item in os.listdir(src):
|
||||
|
@ -1264,7 +1336,6 @@ class NASG(object):
|
|||
|
||||
loop.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
worker = NASG()
|
||||
worker.run()
|
||||
|
|
4
new.py
4
new.py
|
@ -36,7 +36,7 @@ if __name__ == '__main__':
|
|||
now = arrow.utcnow()
|
||||
parser = argparse.ArgumentParser(description='create doc and print it to stdout')
|
||||
parser.add_argument('--tags', '-t', help='; separated, quoted list of tags')
|
||||
parser.add_argument('--date', '-d', help=' YYYY-mm-ddTHH:MM:SS+TZTZ formatted date, if not now')
|
||||
parser.add_argument('--date', '-d', help=' YYYY-mm-ddTHH:MM:SS+TZ formatted date, if not now')
|
||||
parser.add_argument('--slug', '-s', help='slug (normally autogenerated from title or pubdate)')
|
||||
parser.add_argument('--title', '-l', help='title of new entry')
|
||||
parser.add_argument('--bookmark', '-b', help='URL to bookmark')
|
||||
|
@ -48,7 +48,7 @@ if __name__ == '__main__':
|
|||
args = vars(parser.parse_args())
|
||||
|
||||
if not args['date']:
|
||||
d = now.format("YYYY-MM-DDTHH:mm:ssZ")
|
||||
d = now.format(shared.ARROWISO)
|
||||
args['date'] = input('Date [%s]: ' % (d)) or d
|
||||
|
||||
if not args['title']:
|
||||
|
|
|
@ -3,6 +3,7 @@ appdirs==1.4.3
|
|||
arrow==0.10.0
|
||||
breadability==0.1.20
|
||||
chardet==3.0.3
|
||||
decorator==4.0.11
|
||||
docopt==0.6.2
|
||||
httptools==0.0.9
|
||||
Jinja2==2.9.6
|
||||
|
@ -23,6 +24,7 @@ ujson==1.35
|
|||
unicode-slugify==0.1.3
|
||||
Unidecode==0.4.20
|
||||
uvloop==0.8.0
|
||||
validators==0.11.3
|
||||
Wand==0.4.4
|
||||
websockets==3.3
|
||||
Whoosh==2.7.4
|
||||
|
|
77
search.py
Normal file
77
search.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import asyncio
|
||||
import uvloop
|
||||
import os
|
||||
|
||||
from sanic import Sanic
|
||||
import sanic.response
|
||||
from sanic.log import log as logging
|
||||
from whoosh import index
|
||||
from whoosh import qparser
|
||||
from whoosh import fields
|
||||
from whoosh import analysis
|
||||
import jinja2
|
||||
import shared
|
||||
|
||||
def SearchHandler(query, tmpl):
|
||||
response = sanic.response.text(
|
||||
"You seem to have forgot to enter what you want to search for. Please try again.",
|
||||
status=400
|
||||
)
|
||||
|
||||
if not query:
|
||||
return response
|
||||
|
||||
query = query.replace('+', ' AND ').replace(' -', ' NOT ')
|
||||
ix = index.open_dir(os.path.abspath(os.path.join(
|
||||
shared.config.get('target', 'builddir'),
|
||||
shared.config.get('var', 'searchdb')
|
||||
)))
|
||||
|
||||
qp = qparser.MultifieldParser(
|
||||
["title", "content", "tags"],
|
||||
schema = shared.schema
|
||||
)
|
||||
|
||||
q = qp.parse(query)
|
||||
r = ix.searcher().search(q, sortedby="weight", limit=100)
|
||||
logging.info("results for '%s': %i", query, len(r))
|
||||
results = []
|
||||
for result in r:
|
||||
res = {
|
||||
'title': result['title'],
|
||||
'url': result['url'],
|
||||
'highlight': result.highlights("content"),
|
||||
}
|
||||
if 'img' in result:
|
||||
res['img'] = result['img']
|
||||
results.append(res)
|
||||
|
||||
tvars = {
|
||||
'term': query,
|
||||
'posts': results,
|
||||
}
|
||||
|
||||
logging.info("collected %i results to render", len(results))
|
||||
response = sanic.response.html(tmpl.render(tvars), status=200)
|
||||
return response
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
||||
app = Sanic()
|
||||
|
||||
|
||||
jldr = jinja2.FileSystemLoader(
|
||||
searchpath=shared.config.get('source', 'templatesdir')
|
||||
)
|
||||
jenv = jinja2.Environment(loader=jldr)
|
||||
tmpl = jenv.get_template('searchresults.html')
|
||||
|
||||
@app.route("/search")
|
||||
async def search(request, methods=["GET"]):
|
||||
query = request.args.get('s')
|
||||
r = SearchHandler(query, tmpl)
|
||||
return r
|
||||
|
||||
app.run(host="127.0.0.1", port=8001, debug=True)
|
95
shared.py
95
shared.py
|
@ -1,8 +1,11 @@
|
|||
import configparser
|
||||
import os
|
||||
import re
|
||||
import glob
|
||||
import logging
|
||||
import subprocess
|
||||
from whoosh import fields
|
||||
from whoosh import analysis
|
||||
import re
|
||||
|
||||
def __expandconfig(config):
|
||||
""" add the dirs to the config automatically """
|
||||
|
@ -18,6 +21,8 @@ def __expandconfig(config):
|
|||
))
|
||||
return config
|
||||
|
||||
ARROWISO = 'YYYY-MM-DDTHH:mm:ssZ'
|
||||
|
||||
URLREGEX = re.compile(
|
||||
r'\s+https?\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+'
|
||||
r'\.[a-zA-Z0-9\.\/\?\:@\-_=#]*'
|
||||
|
@ -74,3 +79,91 @@ config = configparser.ConfigParser(
|
|||
)
|
||||
config.read('config.ini')
|
||||
config = __expandconfig(config)
|
||||
|
||||
class CMDLine(object):
|
||||
def __init__(self, executable):
|
||||
self.executable = self._which(executable)
|
||||
if self.executable is None:
|
||||
raise OSError('No %s found in PATH!' % executable)
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def _which(name):
|
||||
for d in os.environ['PATH'].split(':'):
|
||||
which = glob.glob(os.path.join(d, name), recursive=True)
|
||||
if which:
|
||||
return which.pop()
|
||||
return None
|
||||
|
||||
def __enter__(self):
|
||||
self.process = subprocess.Popen(
|
||||
[self.executable, "-stay_open", "True", "-@", "-"],
|
||||
universal_newlines=True,
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.process.stdin.write("-stay_open\nFalse\n")
|
||||
self.process.stdin.flush()
|
||||
|
||||
def execute(self, *args):
|
||||
args = args + ("-execute\n",)
|
||||
self.process.stdin.write(str.join("\n", args))
|
||||
self.process.stdin.flush()
|
||||
output = ""
|
||||
fd = self.process.stdout.fileno()
|
||||
while not output.endswith(self.sentinel):
|
||||
output += os.read(fd, 4096).decode('utf-8', errors='ignore')
|
||||
return output[:-len(self.sentinel)]
|
||||
|
||||
class Pandoc(CMDLine):
|
||||
""" Handles calling external binary `exiftool` in an efficient way """
|
||||
def __init__(self, md2html=True):
|
||||
super().__init__('pandoc')
|
||||
if md2html:
|
||||
self.i = "markdown+" + "+".join([
|
||||
'backtick_code_blocks',
|
||||
'auto_identifiers',
|
||||
'fenced_code_attributes',
|
||||
'definition_lists',
|
||||
'grid_tables',
|
||||
'pipe_tables',
|
||||
'strikeout',
|
||||
'superscript',
|
||||
'subscript',
|
||||
'markdown_in_html_blocks',
|
||||
'shortcut_reference_links',
|
||||
'autolink_bare_uris',
|
||||
'raw_html',
|
||||
'link_attributes',
|
||||
'header_attributes',
|
||||
'footnotes',
|
||||
])
|
||||
self.o = 'html5'
|
||||
else:
|
||||
self.o = "markdown-" + "-".join([
|
||||
'raw_html',
|
||||
'native_divs',
|
||||
'native_spans',
|
||||
])
|
||||
self.i = 'html'
|
||||
|
||||
def convert(self, text):
|
||||
cmd = (
|
||||
self.executable,
|
||||
'-o-',
|
||||
'--from=%s' % self.i,
|
||||
'--to=%s' % self.o
|
||||
)
|
||||
logging.debug('converting content with Pandoc')
|
||||
p = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
|
||||
stdout, stderr = p.communicate(input=text.encode())
|
||||
if stderr:
|
||||
logging.error("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr)
|
||||
return stdout.decode('utf-8').strip()
|
||||
|
|
193
webmention.py
Normal file
193
webmention.py
Normal file
|
@ -0,0 +1,193 @@
|
|||
import asyncio
|
||||
import uvloop
|
||||
import os
|
||||
import hashlib
|
||||
import json
|
||||
import urllib.parse
|
||||
import frontmatter
|
||||
from sanic import Sanic
|
||||
import sanic.response
|
||||
from sanic.log import log as logging
|
||||
import validators
|
||||
import arrow
|
||||
from webmentiontools import urlinfo
|
||||
import shared
|
||||
import envelope
|
||||
|
||||
|
||||
class WebmentionHandler(object):
|
||||
def __init__ (self, source, target):
|
||||
self.source = source
|
||||
self.target = target
|
||||
self.now = arrow.utcnow().timestamp
|
||||
logging.info("incoming webmention %s => %s", self.source, self.target)
|
||||
|
||||
self.r = sanic.response.text(
|
||||
"something went wrong on my side, could you please let me know at hello@petermolnar.eu ?",
|
||||
status=500
|
||||
)
|
||||
|
||||
def run(self):
|
||||
if not self._validate():
|
||||
return
|
||||
|
||||
self._parse()
|
||||
self._save()
|
||||
self._notify()
|
||||
|
||||
def _validate(self):
|
||||
test = {
|
||||
self.source: '"souce" parameter is an invalid URL',
|
||||
self.target: '"target" parameter is an invalid URL'
|
||||
}
|
||||
for url, emsg in test.items():
|
||||
logging.debug("validating URL %s", url)
|
||||
if not validators.url(url):
|
||||
self.r = sanic.response.text(
|
||||
emsg,
|
||||
status=400
|
||||
)
|
||||
return False
|
||||
|
||||
logging.debug("checking target domain")
|
||||
_target = urllib.parse.urlparse(self.target)
|
||||
_target_domain = '{uri.netloc}'.format(uri=_target)
|
||||
_mydomains = shared.config.get('site', 'domains').split(" ")
|
||||
if not _target_domain in _mydomains:
|
||||
self.r = sanic.response.text(
|
||||
"'target' is not in the list of allowed domains",
|
||||
status=400
|
||||
)
|
||||
return False
|
||||
|
||||
logging.debug("checking selfpings")
|
||||
_source = urllib.parse.urlparse(self.source)
|
||||
_source_domain = '{uri.netloc}'.format(uri=_source)
|
||||
if _source_domain in _mydomains:
|
||||
self.r = sanic.response.text(
|
||||
"selfpings are not allowed",
|
||||
status=400
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _parse(self):
|
||||
logging.debug("fetching %s", self.source)
|
||||
self._source = urlinfo.UrlInfo(self.source)
|
||||
if self._source.error:
|
||||
self.r = sanic.response.text(
|
||||
"couldn't fetch 'source' from %s" % (self.source),
|
||||
status=408
|
||||
)
|
||||
return False
|
||||
|
||||
self.source = self._source.realurl
|
||||
if not self._source.linksTo(self.target):
|
||||
self.r = sanic.response.text(
|
||||
"'source' (%s) does not link to 'target' (%s)" % (
|
||||
self.source,
|
||||
self.target
|
||||
),
|
||||
status=400
|
||||
)
|
||||
return False
|
||||
|
||||
logging.debug("fetching %s", self.target)
|
||||
self._target = urlinfo.UrlInfo(self.target)
|
||||
if self._target.error:
|
||||
self.r = sanic.response.text(
|
||||
"couldn't fetch 'target' from %s" % (self.target),
|
||||
status=408
|
||||
)
|
||||
self.target = self._target.realurl
|
||||
#logging.info("parsed webmention:\n%s\n\n%s", self.meta, self.content)
|
||||
|
||||
def _save(self):
|
||||
doc = frontmatter.loads('')
|
||||
doc.metadata = self.meta
|
||||
doc.content = self.content
|
||||
target = os.path.join(
|
||||
shared.config.get('source', 'commentsdir'),
|
||||
self.mhash
|
||||
)
|
||||
if os.path.isfile(target):
|
||||
logging.warning('updating existing webmention %s', target)
|
||||
else:
|
||||
logging.warning('saving incoming webmention to %s', target)
|
||||
|
||||
with open(target, 'wt') as t:
|
||||
t.write(frontmatter.dumps(doc))
|
||||
self.r = sanic.response.text(
|
||||
"accepted",
|
||||
status=202
|
||||
)
|
||||
|
||||
def _notify(self):
|
||||
text = "# webmention\n## Source\n\nauthor\n: %s\n\nURL\n: %s\n\nemail\n: %s\n\ndate\n: %s\n\n## Target\n\nURL\n: %s\n\n---\n\n%s" % (
|
||||
self._meta['author'].get('name', self.source),
|
||||
self._meta['author'].get('url', self.source),
|
||||
self._meta['author'].get('email', ''),
|
||||
self._meta['date'],
|
||||
self.target,
|
||||
self.content
|
||||
)
|
||||
|
||||
l = envelope.Letter(
|
||||
sender=(
|
||||
shared.config.get('webmention', 'from_name'),
|
||||
shared.config.get('webmention', 'from_address')
|
||||
),
|
||||
recipient=(
|
||||
shared.config.get('webmention', 'to_name'),
|
||||
shared.config.get('webmention', 'to_address')
|
||||
),
|
||||
subject="[webmention] %s" % self.source,
|
||||
text=text
|
||||
)
|
||||
l.make()
|
||||
l.send()
|
||||
|
||||
@property
|
||||
def mhash(self):
|
||||
return hashlib.sha1(json.dumps(self.meta, sort_keys=True).encode('utf-8')).hexdigest()
|
||||
|
||||
@property
|
||||
def meta(self):
|
||||
if hasattr(self, '_meta'):
|
||||
return self._meta
|
||||
|
||||
self._meta = {
|
||||
'author': self._source.author(),
|
||||
'type': self._source.relationType(),
|
||||
'target': self.target,
|
||||
'source': self.source,
|
||||
'date': arrow.get(self._source.pubDate()).format(shared.ARROWISO),
|
||||
}
|
||||
return self._meta
|
||||
|
||||
@property
|
||||
def content(self):
|
||||
if hasattr(self, '_content'):
|
||||
return self._content
|
||||
|
||||
# from HTML to Markdown
|
||||
self._content = shared.Pandoc(False).convert(self._source.content())
|
||||
# from Markdown back to HTML
|
||||
#self._content = shared.Pandoc().convert(tmpcontent)
|
||||
return self._content
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
||||
app = Sanic()
|
||||
|
||||
@app.route("/webmention", methods=["POST"])
|
||||
async def wm(request):
|
||||
source = request.form.get('source')
|
||||
target = request.form.get('target')
|
||||
r = WebmentionHandler(source, target)
|
||||
r.run()
|
||||
return r.r
|
||||
|
||||
app.run(host="127.0.0.1", port=8002, debug=True)
|
Loading…
Reference in a new issue