all repos — nasg @ d3fbf2e51f0a1c52e530acc9f08ab9c5b64e2f8f

Back To Pandoc

So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random   out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.

Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
Peter Molnar hello@petermolnar.eu
Sat, 04 Aug 2018 00:28:55 +0100
commit

d3fbf2e51f0a1c52e530acc9f08ab9c5b64e2f8f

parent

96d0c238d68b147540085bc1e13f9f15fc444258

D html5_fenced_code.py

@@ -1,57 +0,0 @@

-""" -This is a simplified FencedBlockPreprocessor which outputs "proper" <code> -naming, eg. language-python, instead of just python, so prism.js understands -it. - -It doesn't deal with CodeHilite. - -""" - -from markdown.preprocessors import Preprocessor -from markdown.extensions import Extension -from markdown.extensions.fenced_code import FencedBlockPreprocessor - -class HTML5FencedBlockPreprocessor(Preprocessor): - FENCED_BLOCK_RE = FencedBlockPreprocessor.FENCED_BLOCK_RE - CODE_WRAP = '<pre><code%s>%s</code></pre>' - LANG_TAG = ' class="language-%s"' - - def __init__(self, md): - super(HTML5FencedBlockPreprocessor, self).__init__(md) - - def run(self, lines): - text = "\n".join(lines) - while 1: - m = self.FENCED_BLOCK_RE.search(text) - if m: - lang = '' - if m.group('lang'): - lang = self.LANG_TAG % (m.group('lang')) - - code = self.CODE_WRAP % ( - lang, - m.group('code') - ) - - placeholder = self.markdown.htmlStash.store(code) - text = '%s\n%s\n%s' % ( - text[:m.start()], - placeholder, - text[m.end():] - ) - else: - break - return text.split("\n") - - -class HTML5FencedCodeExtension(Extension): - def extendMarkdown(self, md, md_globals): - md.registerExtension(self) - md.preprocessors.add( - 'html5_fenced_code', - HTML5FencedBlockPreprocessor(md), - ">normalize_whitespace" - ) - -def makeExtension(*args, **kwargs): - return HTML5FencedCodeExtension(*args, **kwargs)
M nasg.pynasg.py

@@ -10,7 +10,7 @@

import glob import os import time -from functools import lru_cache as cached +from functools import partial import re import imghdr import logging

@@ -26,18 +26,15 @@ import langdetect

import wand.image import jinja2 import frontmatter -import markdown from feedgen.feed import FeedGenerator from bleach import clean from emoji import UNICODE_EMOJI from slugify import slugify import requests +from pandoc import pandoc import exiftool import settings import keys -import html5_fenced_code - -from pprint import pprint MarkdownImage = namedtuple( 'MarkdownImage',

@@ -56,33 +53,37 @@ r'(?:\s[\'\"](?P<title>[^\"\']+)[\'\"])?\)(?:{(?P<css>[^\}]+)\})?)',

re.IGNORECASE ) -RE_HTTP = re.compile( - r'^https?://', - re.IGNORECASE +RE_CODE = re.compile( + r'(?:[~`]{3})(?:[^`]+)?' ) -MD = markdown.Markdown( - output_format='xhtml5', - extensions=[ - 'html5_fenced_code', - 'abbr', - 'attr_list', - 'def_list', - 'footnotes', - 'tables', - 'smart_strong', - 'headerid', - 'urlize', - ] +RE_PRECODE = re.compile( + r'<pre class="([^"]+)"><code>' ) -RE_CODE = re.compile( - r'(?:[~`]{3})(?:[^`]+)?' -) +class cached_property(object): + def __init__(self, method, name=None): + # record the unbound-method and the name + self.method = method + self.name = name or method.__name__ + self.__doc__ = method.__doc__ + def __get__(self, inst, cls): + # self: <__main__.cache object at 0xb781340c> + # inst: <__main__.Foo object at 0xb781348c> + # cls: <class '__main__.Foo'> + if inst is None: + # instance attribute accessed on class, return self + # You get here if you write `Foo.bar` + return self + # compute, cache and return the instance's attribute value + result = self.method(inst) + # setattr redefines the instance's attribute so this doesn't get called again + setattr(inst, self.name, result) + return result + class MarkdownDoc(object): - @property - @cached() + @cached_property def _parsed(self): with open(self.fpath, mode='rt') as f: logging.debug('parsing YAML+MD file %s', self.fpath)

@@ -97,14 +98,16 @@ @property

def content(self): return self._parsed[1] - @property - @cached() + @cached_property def html_content(self): c = "%s" % (self.content) if hasattr(self, 'images') and len(self.images): for match, img in self.images.items(): c = c.replace(match, str(img)) - return MD.reset().convert(c) + # return MD.reset().convert(c) + c = pandoc(c) + c = RE_PRECODE.sub('<pre><code lang="\g<1>" class="language-\g<1>">', c) + return c class Comment(MarkdownDoc):

@@ -188,8 +191,7 @@ """

Redirect object for entries that moved """ - @property - @cached() + @cached_property def target(self): target = '' with open(self.fpath, 'rt') as f:

@@ -219,8 +221,7 @@ if ctime > self.mtime:

ret = ctime return ret - @property - @cached() + @cached_property def files(self): """ An array of files present at the same directory level as

@@ -233,8 +234,7 @@ for k in glob.glob(os.path.join(os.path.dirname(self.fpath), '*.*'))

if not k.endswith('.md') and not k.startswith('.') ] - @property - @cached() + @cached_property def comments(self): """ An dict of Comment objects keyed with their path, populated from the

@@ -251,8 +251,7 @@ c = Comment(f)

comments[c.dt.timestamp] = c return comments - @property - @cached() + @cached_property def images(self): """ A dict of WebImage objects, populated by:

@@ -317,10 +316,10 @@ @property

def summary(self): return self.meta.get('summary', '') - @property - @cached() + @cached_property def html_summary(self): - return MD.reset().convert(self.summary) + # return MD.reset().convert(self.summary) + return pandoc(self.summary) @property def title(self):

@@ -428,8 +427,7 @@ return True

else: return False - @property - @cached() + @cached_property def tmplvars(self): v = { 'title': self.title,

@@ -548,8 +546,7 @@ 'exif': self.exif,

'is_photo': self.is_photo, }) - @property - @cached() + @cached_property def meta(self): return exiftool.Exif(self.fpath)

@@ -844,7 +841,7 @@ def __init__(self):

self._tasks = [] self._loop = asyncio.get_event_loop() - def append(self, job): + def add(self, job): task = self._loop.create_task(job) self._tasks.append(task)

@@ -872,7 +869,7 @@ def add_redirect(self, source, target):

if target in self.gone: self.add_gone(source) else: - if not RE_HTTP.match(target): + if '://' not in target: target = "%s/%s" % (settings.site.get('url'), target) self.redirect[source] = target

@@ -1003,6 +1000,7 @@

fg = FeedGenerator() fg.id(self.feed) fg.link(href=self.feed, rel='self') + fg.link(href=settings.meta.get('hub'), rel='hub') fg.title(self.title) fg.author({ 'name': settings.author.get('name'),

@@ -1014,6 +1012,10 @@

for post in self.get_posts(start, end): dt = arrow.get(post.get('pubtime')) fe = fg.add_entry() + fe.author({ + 'name': settings.author.get('name'), + 'email':settings.author.get('email') + }) fe.id(post.get('url')) fe.link(href=post.get('url')) fe.title(post.get('title'))

@@ -1021,7 +1023,7 @@ fe.published(dt.datetime)

fe.updated(dt.datetime) fe.content( post.get('html_content'), - type='CDATA' + #src=post.get('url') ) fe.rights('%s %s %s' % ( post.get('licence').upper(),

@@ -1035,15 +1037,15 @@ enc.get('url'),

"%d" % enc.get('size'), enc.get('mime') ) + atom = os.path.join(dirname, 'index.xml') with open(atom, 'wb') as f: logging.info('writing file: %s', atom) f.write(fg.atom_str(pretty=True)) - jsfile = os.path.join(dirname, 'index.json') def render_page(self, pagenum=1, pages=1): if self.display == 'flat': - start = 1 + start = 0 end = -1 else: pagination = int(settings.site.get('pagination'))

@@ -1201,7 +1203,7 @@ @property

def renderfile(self): return os.path.join(settings.paths.get('build'), 'sitemap.txt') - async def save(self): + async def render(self): if self.mtime >= sorted(self.values())[-1]: return with open(self.renderfile, 'wt') as f:

@@ -1274,8 +1276,8 @@ makecomments()

content = settings.paths.get('content') worker = AsyncWorker() - rules = IndexPHP() + for e in glob.glob(os.path.join(content, '*', '*.ptr')): post = Gone(e) if post.mtime > last:

@@ -1287,8 +1289,8 @@ if post.mtime > last:

last = post.mtime rules.add_redirect(post.source, post.target) - if rules.mtime < last: - worker.append(rules.render()) + if rules.mtime < last or settings.args.get('force'): + worker.add(rules.render()) sitemap = Sitemap() search = Search()

@@ -1297,10 +1299,10 @@ categories['/'] = Category()

for e in sorted(glob.glob(os.path.join(content, '*', '*', 'index.md'))): post = Singular(e) - worker.append(post.render()) - worker.append(post.copyfiles()) + worker.add(post.copyfiles()) for i in post.images.values(): - worker.append(i.downsize()) + worker.add(i.downsize()) + worker.add(post.render()) if post.is_future: continue else:

@@ -1322,11 +1324,11 @@ content=post.content

) search.__exit__() - worker.append(search.render()) + search.render() for category in categories.values(): - worker.append(category.render()) + worker.add(category.render()) - worker.append(sitemap.save()) + worker.add(sitemap.render()) worker.run() logging.info('worker finished')
A pandoc.py

@@ -0,0 +1,44 @@

+import subprocess +import logging + +def pandoc(text): + # TODO: cache? + # import hashlib + # print(hashlib.md5("whatever your string is".encode('utf-8')).hexdigest()) + + """ Pandoc command line call with piped in- and output """ + cmd = ( + 'pandoc', + '-o-', + '--from=markdown+%s' % ( + '+'.join([ + 'footnotes', + 'pipe_tables', + 'raw_html', + 'definition_lists', + 'backtick_code_blocks', + 'fenced_code_attributes', + 'shortcut_reference_links', + 'lists_without_preceding_blankline', + 'autolink_bare_uris', + ]) + ), + '--to=html5', + '--quiet', + '--no-highlight' + ) + p = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + stdout, stderr = p.communicate(input=text.encode()) + if stderr: + logging.warning( + "Error during pandoc covert:\n\t%s\n\t%s", + cmd, + stderr + ) + return stdout.decode('utf-8').strip()
M requirements.txtrequirements.txt

@@ -1,26 +1,10 @@

arrow==0.12.1 bleach==2.1.3 -certifi==2018.4.16 -chardet==3.0.4 -decorator==4.3.0 emoji==0.5.0 feedgen==0.7.0 -html5lib==1.0.1 -idna==2.7 Jinja2==2.10 langdetect==1.0.7 -lxml==4.2.3 -Markdown==2.6.11 -markdown-urlize==0.2.0 -MarkupSafe==1.0 -Pygments==2.2.0 -python-dateutil==2.7.3 python-frontmatter==0.4.2 -PyYAML==3.13 requests==2.19.1 -six==1.11.0 unicode-slugify==0.1.3 -Unidecode==1.0.22 -urllib3==1.23 Wand==0.4.4 -webencodings==0.5.1
M templates/Index.j2.phptemplates/Index.j2.php

@@ -6,10 +6,26 @@ "{{ from }}" => "{{ to }}",

{% endfor %} ); +$redirects_re = array( + '^(?:sysadmin|it|linux-tech-coding|sysadmin-blog)\/?(page.*)?$' => 'category/article/', + '^(?:fotography|photoblog)\/?(page.*)?$' => '/category/photo/$1', + '^blog\/?(page.*)?$' => '/category/journal/', + '^blips\/?(page.*)?$' => '/category/note/$1', + '^r\/?(page.*)?$' => '/category/note/$1', + '^(?:linux-tech-coding|it|sysadmin-blog|sysadmin|fotography|blips|blog|photoblog|article|journal|photo|note|r)\/((?!page).*)' => '/$1', +); + $gone = array( {% for gone in gones %} "{{ gone }}" => true, {% endfor %} +); + +$gone_re = array( + '^cache/.*$', + '^files/.*$', + '^wp-content/.*$', + '^broadcast\/wp-ffpc.message$', );

@@ -78,11 +94,33 @@ $uri = str_replace('/feed/', '', $uri);

$uri = str_replace('/atom/', '', $uri); $uri = trim($uri, '/'); -if (isset($gone[$uri])) +foreach ($gone_re as $pattern) { + if (preg_match(sprintf('/%s/', $pattern), $uri)) { + gone($uri); + } +} + +foreach ($redirects_re as $pattern => $target) { + $maybe = preg_match(sprintf('/%s/i', $pattern), $uri, $matches); + if ($maybe) { + $target = str_replace('$1', $matches[1], $target); + redirect_to($target); + } +} + +/* "logic" */ +if (isset($gone[$uri])) { gone($uri); -elseif (isset($redirects[$uri])) +} +elseif (isset($redirects[$uri])) { redirect_to($redirects[$uri]); -elseif (strstr($uri, '_')) +} +elseif (preg_match('/^\.well-known\/(host-meta|webfinger).*$/', $uri)) { + redirect_to("https://fed.brid.gy/{$uri}"); +} +elseif (strstr($uri, '_')) { maybe_redirect(str_replace('_', '-', $uri)); -else +} +else { notfound(); +}
M templates/Singular.j2.htmltemplates/Singular.j2.html

@@ -5,3 +5,11 @@ <meta name="description" content="{{ post.summary|e }}" />

<link rel="canonical" href="{{ post.url }}" /> <link rel="license" href="https://creativecommons.org/licenses/4.0/{{ post.licence }}" /> {% endblock %} +{% block prism %} + {% if post.has_code %} + <style media="all"> + {% include 'prism.css' %} + </style> + <script src="{{ site.url }}/prism.js"></script> + {% endif %} +{% endblock %}
M templates/base.j2.htmltemplates/base.j2.html

@@ -33,14 +33,11 @@ setto = 'none';

} localStorage.setItem("stylesheet", setto); e.setAttribute("media", setto); + return false; } </script> - {% if post.has_code %} - <style media="all"> - {% include 'prism.css' %} - </style> - <script src="{{ site.url }}/prism.js"></script> - {% endif %} +{% block prism %} +{% endblock %} </head> <body>

@@ -90,7 +87,7 @@ </form>

<p class="contrast"> <a title="toggle site colour scheme" href="#" - onclick="toggleStylesheet(this)"> + onclick="return toggleStylesheet(this)"> <svg class="icon" width="16" height="16"> <use xlink:href="#icon-contrast" /> </svg>

@@ -169,9 +166,7 @@ alt="Photo of {{ author.name }}" />

<a class="fn p-name url u-url u-uid" href="{{ author.url }}"> {{ author.name }} </a> - &lt;<a rel="me" class="u-email email" href="mailto:{{ author.email }}"> - {{ author.email }} - </a>&gt; + &lt;<a rel="me" class="u-email email" href="mailto:{{ author.email }}">{{ author.email }}</a>&gt; </p> </dd>

@@ -422,7 +417,7 @@ </div>

</nav> <div class="webring"> <a href="https://xn--sr8hvo.ws/🇻🇮📢/previous">←</a> - Member of <a href="https://xn--sr8hvo.ws">IndieWeb Webring</a> 🕸💍 + Member of <a href="https://xn--sr8hvo.ws">IndieWeb Webring</a> <a href="https://xn--sr8hvo.ws/🇻🇮📢/next">→</a> </div> </div>
M templates/style.csstemplates/style.css

@@ -25,7 +25,7 @@ transform: rotate(0deg);

width: 16px; height: 16px; fill: currentColor; - vertical-align: text-top; + vertical-align: middle; } a {

@@ -39,6 +39,10 @@ }

h1 { font-size: 1.6em; +} + +h1, h2 { + line-height: 1.2em; } h2, h3 {

@@ -153,6 +157,7 @@ }

pre { padding: 0.6em; + position: relative; } code {

@@ -163,6 +168,11 @@ pre > code {

border: none; } +pre> code::before { + content: attr(lang); + float: right; +} + table { border-collapse: collapse; border-spacing: 0;

@@ -280,16 +290,11 @@ body > footer dd {

margin: -1.3em 0 0 4em; } -body > footer nav { - display: flex; - justify-content: space-between; -} - .webring { text-align: center; } -.footnote a { +.footnotes a { display: inline-block; overflow: hidden; white-space: nowrap;

@@ -298,7 +303,7 @@ vertical-align: top;

max-width: 80%; } -.footnote-ref { +.footnote-back { margin: 0 0 0 0.1em; }

@@ -306,6 +311,7 @@ .contrast,

.follow { position: fixed; right: 1em; + z-index: 100; } .contrast {

@@ -333,5 +339,10 @@ }

body > header form { margin: 0; + } + + body > footer nav { + display: flex; + justify-content: space-between; } }