From d3fbf2e51f0a1c52e530acc9f08ab9c5b64e2f8f Mon Sep 17 00:00:00 2001 From: Peter Molnar Date: Sat, 4 Aug 2018 00:28:55 +0100 Subject: [PATCH] Back To Pandoc So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs, random   out of nowhere, lack of features. It's definitely much faster, than Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block, that happens to be a regex for markdown elements. Also added some ugly post string replacements to make Pandoc fenced code output work with Prism: instead of the Pandoc
, Prism wants

-naming, eg. language-python, instead of just python, so prism.js understands
-it.
-
-It doesn't deal with CodeHilite.
-
-"""
-
-from markdown.preprocessors import Preprocessor
-from markdown.extensions import Extension
-from markdown.extensions.fenced_code import FencedBlockPreprocessor
-
-class HTML5FencedBlockPreprocessor(Preprocessor):
-    FENCED_BLOCK_RE = FencedBlockPreprocessor.FENCED_BLOCK_RE
-    CODE_WRAP = '
%s
' - LANG_TAG = ' class="language-%s"' - - def __init__(self, md): - super(HTML5FencedBlockPreprocessor, self).__init__(md) - - def run(self, lines): - text = "\n".join(lines) - while 1: - m = self.FENCED_BLOCK_RE.search(text) - if m: - lang = '' - if m.group('lang'): - lang = self.LANG_TAG % (m.group('lang')) - - code = self.CODE_WRAP % ( - lang, - m.group('code') - ) - - placeholder = self.markdown.htmlStash.store(code) - text = '%s\n%s\n%s' % ( - text[:m.start()], - placeholder, - text[m.end():] - ) - else: - break - return text.split("\n") - - -class HTML5FencedCodeExtension(Extension): - def extendMarkdown(self, md, md_globals): - md.registerExtension(self) - md.preprocessors.add( - 'html5_fenced_code', - HTML5FencedBlockPreprocessor(md), - ">normalize_whitespace" - ) - -def makeExtension(*args, **kwargs): - return HTML5FencedCodeExtension(*args, **kwargs) diff --git a/nasg.py b/nasg.py index 04973af..7cb8522 100644 --- a/nasg.py +++ b/nasg.py @@ -10,7 +10,7 @@ __email__ = "mail@petermolnar.net" import glob import os import time -from functools import lru_cache as cached +from functools import partial import re import imghdr import logging @@ -26,18 +26,15 @@ import langdetect import wand.image import jinja2 import frontmatter -import markdown from feedgen.feed import FeedGenerator from bleach import clean from emoji import UNICODE_EMOJI from slugify import slugify import requests +from pandoc import pandoc import exiftool import settings import keys -import html5_fenced_code - -from pprint import pprint MarkdownImage = namedtuple( 'MarkdownImage', @@ -56,33 +53,37 @@ RE_MDIMG = re.compile( re.IGNORECASE ) -RE_HTTP = re.compile( - r'^https?://', - re.IGNORECASE -) - -MD = markdown.Markdown( - output_format='xhtml5', - extensions=[ - 'html5_fenced_code', - 'abbr', - 'attr_list', - 'def_list', - 'footnotes', - 'tables', - 'smart_strong', - 'headerid', - 'urlize', - ] -) - RE_CODE = re.compile( r'(?:[~`]{3})(?:[^`]+)?' ) +RE_PRECODE = re.compile( + r'
'
+)
+
+class cached_property(object):
+    def __init__(self, method, name=None):
+        # record the unbound-method and the name
+        self.method = method
+        self.name = name or method.__name__
+        self.__doc__ = method.__doc__
+    def __get__(self, inst, cls):
+        # self: <__main__.cache object at 0xb781340c>
+        # inst: <__main__.Foo object at 0xb781348c>
+        # cls: 
+        if inst is None:
+            # instance attribute accessed on class, return self
+            # You get here if you write `Foo.bar`
+            return self
+        # compute, cache and return the instance's attribute value
+        result = self.method(inst)
+        # setattr redefines the instance's attribute so this doesn't get called again
+        setattr(inst, self.name, result)
+        return result
+
+
 class MarkdownDoc(object):
-    @property
-    @cached()
+    @cached_property
     def _parsed(self):
         with open(self.fpath, mode='rt') as f:
             logging.debug('parsing YAML+MD file %s', self.fpath)
@@ -97,14 +98,16 @@ class MarkdownDoc(object):
     def content(self):
         return self._parsed[1]
 
-    @property
-    @cached()
+    @cached_property
     def html_content(self):
         c = "%s" % (self.content)
         if hasattr(self, 'images') and len(self.images):
             for match, img in self.images.items():
                 c = c.replace(match, str(img))
-        return MD.reset().convert(c)
+        # return MD.reset().convert(c)
+        c = pandoc(c)
+        c = RE_PRECODE.sub('
', c)
+        return c
 
 
 class Comment(MarkdownDoc):
@@ -188,8 +191,7 @@ class Redirect(Gone):
     Redirect object for entries that moved
     """
 
-    @property
-    @cached()
+    @cached_property
     def target(self):
         target = ''
         with open(self.fpath, 'rt') as f:
@@ -219,8 +221,7 @@ class Singular(MarkdownDoc):
                 ret = ctime
         return ret
 
-    @property
-    @cached()
+    @cached_property
     def files(self):
         """
         An array of files present at the same directory level as
@@ -233,8 +234,7 @@ class Singular(MarkdownDoc):
             if not k.endswith('.md') and not k.startswith('.')
         ]
 
-    @property
-    @cached()
+    @cached_property
     def comments(self):
         """
         An dict of Comment objects keyed with their path, populated from the
@@ -251,8 +251,7 @@ class Singular(MarkdownDoc):
             comments[c.dt.timestamp] = c
         return comments
 
-    @property
-    @cached()
+    @cached_property
     def images(self):
         """
         A dict of WebImage objects, populated by:
@@ -317,10 +316,10 @@ class Singular(MarkdownDoc):
     def summary(self):
         return self.meta.get('summary', '')
 
-    @property
-    @cached()
+    @cached_property
     def html_summary(self):
-        return MD.reset().convert(self.summary)
+        # return MD.reset().convert(self.summary)
+        return pandoc(self.summary)
 
     @property
     def title(self):
@@ -428,8 +427,7 @@ class Singular(MarkdownDoc):
         else:
             return False
 
-    @property
-    @cached()
+    @cached_property
     def tmplvars(self):
         v = {
             'title': self.title,
@@ -548,8 +546,7 @@ class WebImage(object):
             'is_photo': self.is_photo,
         })
 
-    @property
-    @cached()
+    @cached_property
     def meta(self):
         return exiftool.Exif(self.fpath)
 
@@ -844,7 +841,7 @@ class AsyncWorker(object):
         self._tasks = []
         self._loop = asyncio.get_event_loop()
 
-    def append(self, job):
+    def add(self, job):
         task = self._loop.create_task(job)
         self._tasks.append(task)
 
@@ -872,7 +869,7 @@ class IndexPHP(object):
         if target in self.gone:
             self.add_gone(source)
         else:
-            if not RE_HTTP.match(target):
+            if '://' not in target:
                 target = "%s/%s" % (settings.site.get('url'), target)
             self.redirect[source] = target
 
@@ -1003,6 +1000,7 @@ class Category(dict):
         fg = FeedGenerator()
         fg.id(self.feed)
         fg.link(href=self.feed, rel='self')
+        fg.link(href=settings.meta.get('hub'), rel='hub')
         fg.title(self.title)
         fg.author({
             'name': settings.author.get('name'),
@@ -1014,6 +1012,10 @@ class Category(dict):
         for post in self.get_posts(start, end):
             dt = arrow.get(post.get('pubtime'))
             fe = fg.add_entry()
+            fe.author({
+                'name': settings.author.get('name'),
+                'email':settings.author.get('email')
+            })
             fe.id(post.get('url'))
             fe.link(href=post.get('url'))
             fe.title(post.get('title'))
@@ -1021,7 +1023,7 @@ class Category(dict):
             fe.updated(dt.datetime)
             fe.content(
                 post.get('html_content'),
-                type='CDATA'
+                #src=post.get('url')
             )
             fe.rights('%s %s %s' % (
                 post.get('licence').upper(),
@@ -1035,15 +1037,15 @@ class Category(dict):
                     "%d" % enc.get('size'),
                     enc.get('mime')
                 )
+
         atom = os.path.join(dirname, 'index.xml')
         with open(atom, 'wb') as f:
             logging.info('writing file: %s', atom)
             f.write(fg.atom_str(pretty=True))
-        jsfile = os.path.join(dirname, 'index.json')
 
     def render_page(self, pagenum=1, pages=1):
         if self.display == 'flat':
-            start = 1
+            start = 0
             end = -1
         else:
             pagination = int(settings.site.get('pagination'))
@@ -1201,7 +1203,7 @@ class Sitemap(dict):
     def renderfile(self):
         return os.path.join(settings.paths.get('build'), 'sitemap.txt')
 
-    async def save(self):
+    async def render(self):
         if self.mtime >= sorted(self.values())[-1]:
             return
         with open(self.renderfile, 'wt') as f:
@@ -1274,8 +1276,8 @@ def make():
 
     content = settings.paths.get('content')
     worker = AsyncWorker()
-
     rules = IndexPHP()
+
     for e in glob.glob(os.path.join(content, '*', '*.ptr')):
         post = Gone(e)
         if post.mtime > last:
@@ -1287,8 +1289,8 @@ def make():
             last = post.mtime
         rules.add_redirect(post.source, post.target)
 
-    if rules.mtime < last:
-        worker.append(rules.render())
+    if rules.mtime < last or settings.args.get('force'):
+        worker.add(rules.render())
 
     sitemap = Sitemap()
     search = Search()
@@ -1297,10 +1299,10 @@ def make():
 
     for e in sorted(glob.glob(os.path.join(content, '*', '*', 'index.md'))):
         post = Singular(e)
-        worker.append(post.render())
-        worker.append(post.copyfiles())
+        worker.add(post.copyfiles())
         for i in post.images.values():
-            worker.append(i.downsize())
+            worker.add(i.downsize())
+        worker.add(post.render())
         if post.is_future:
             continue
         else:
@@ -1322,11 +1324,11 @@ def make():
             )
 
     search.__exit__()
-    worker.append(search.render())
+    search.render()
     for category in categories.values():
-        worker.append(category.render())
+        worker.add(category.render())
 
-    worker.append(sitemap.save())
+    worker.add(sitemap.render())
 
     worker.run()
     logging.info('worker finished')
diff --git a/pandoc.py b/pandoc.py
new file mode 100644
index 0000000..37c9643
--- /dev/null
+++ b/pandoc.py
@@ -0,0 +1,44 @@
+import subprocess
+import logging
+
+def pandoc(text):
+    # TODO: cache?
+    # import hashlib
+    # print(hashlib.md5("whatever your string is".encode('utf-8')).hexdigest())
+
+    """ Pandoc command line call with piped in- and output """
+    cmd = (
+        'pandoc',
+        '-o-',
+        '--from=markdown+%s' % (
+            '+'.join([
+                'footnotes',
+                'pipe_tables',
+                'raw_html',
+                'definition_lists',
+                'backtick_code_blocks',
+                'fenced_code_attributes',
+                'shortcut_reference_links',
+                'lists_without_preceding_blankline',
+                'autolink_bare_uris',
+            ])
+        ),
+        '--to=html5',
+        '--quiet',
+        '--no-highlight'
+    )
+    p = subprocess.Popen(
+        cmd,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+
+    stdout, stderr = p.communicate(input=text.encode())
+    if stderr:
+        logging.warning(
+            "Error during pandoc covert:\n\t%s\n\t%s",
+            cmd,
+            stderr
+        )
+    return stdout.decode('utf-8').strip()
diff --git a/requirements.txt b/requirements.txt
index 694f08f..a59b465 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,26 +1,10 @@
 arrow==0.12.1
 bleach==2.1.3
-certifi==2018.4.16
-chardet==3.0.4
-decorator==4.3.0
 emoji==0.5.0
 feedgen==0.7.0
-html5lib==1.0.1
-idna==2.7
 Jinja2==2.10
 langdetect==1.0.7
-lxml==4.2.3
-Markdown==2.6.11
-markdown-urlize==0.2.0
-MarkupSafe==1.0
-Pygments==2.2.0
-python-dateutil==2.7.3
 python-frontmatter==0.4.2
-PyYAML==3.13
 requests==2.19.1
-six==1.11.0
 unicode-slugify==0.1.3
-Unidecode==1.0.22
-urllib3==1.23
 Wand==0.4.4
-webencodings==0.5.1
diff --git a/templates/Index.j2.php b/templates/Index.j2.php
index 86fa932..6e4ee6f 100644
--- a/templates/Index.j2.php
+++ b/templates/Index.j2.php
@@ -6,12 +6,28 @@ $redirects = array(
 {% endfor %}
 );
 
+$redirects_re = array(
+    '^(?:sysadmin|it|linux-tech-coding|sysadmin-blog)\/?(page.*)?$' => 'category/article/',
+    '^(?:fotography|photoblog)\/?(page.*)?$' => '/category/photo/$1',
+    '^blog\/?(page.*)?$' => '/category/journal/',
+    '^blips\/?(page.*)?$' => '/category/note/$1',
+    '^r\/?(page.*)?$' => '/category/note/$1',
+    '^(?:linux-tech-coding|it|sysadmin-blog|sysadmin|fotography|blips|blog|photoblog|article|journal|photo|note|r)\/((?!page).*)' => '/$1',
+);
+
 $gone = array(
 {% for gone in gones %}
     "{{ gone }}" => true,
 {% endfor %}
 );
 
+$gone_re = array(
+    '^cache/.*$',
+    '^files/.*$',
+    '^wp-content/.*$',
+    '^broadcast\/wp-ffpc.message$',
+);
+
 
 function redirect_to($uri) {
     header('HTTP/1.1 301 Moved Permanently');
@@ -78,11 +94,33 @@ $uri = str_replace('/feed/', '', $uri);
 $uri = str_replace('/atom/', '', $uri);
 $uri = trim($uri, '/');
 
-if (isset($gone[$uri]))
+foreach ($gone_re as $pattern) {
+    if (preg_match(sprintf('/%s/', $pattern), $uri)) {
+        gone($uri);
+    }
+}
+
+foreach ($redirects_re as $pattern => $target) {
+    $maybe = preg_match(sprintf('/%s/i', $pattern), $uri, $matches);
+    if ($maybe) {
+        $target = str_replace('$1', $matches[1], $target);
+        redirect_to($target);
+    }
+}
+
+/* "logic" */
+if (isset($gone[$uri])) {
     gone($uri);
-elseif (isset($redirects[$uri]))
+}
+elseif (isset($redirects[$uri])) {
     redirect_to($redirects[$uri]);
-elseif (strstr($uri, '_'))
+}
+elseif (preg_match('/^\.well-known\/(host-meta|webfinger).*$/', $uri)) {
+    redirect_to("https://fed.brid.gy/{$uri}");
+}
+elseif (strstr($uri, '_')) {
     maybe_redirect(str_replace('_', '-', $uri));
-else
+}
+else {
     notfound();
+}
diff --git a/templates/Singular.j2.html b/templates/Singular.j2.html
index 28497e2..b89db27 100644
--- a/templates/Singular.j2.html
+++ b/templates/Singular.j2.html
@@ -5,3 +5,11 @@
     
     
 {% endblock %}
+{% block prism %}
+    {% if post.has_code %}
+    
+    
+    {% endif %}
+{% endblock %}
diff --git a/templates/base.j2.html b/templates/base.j2.html
index 25917ab..8ed47f7 100644
--- a/templates/base.j2.html
+++ b/templates/base.j2.html
@@ -33,14 +33,11 @@
             }
             localStorage.setItem("stylesheet", setto);
             e.setAttribute("media", setto);
+            return false;
         }
     
-    {% if post.has_code %}
-    
-    
-    {% endif %}
+{% block prism %}
+{% endblock %}
 
 
 
@@ -90,7 +87,7 @@
 
     

+ onclick="return toggleStylesheet(this)"> @@ -169,9 +166,7 @@ {{ author.name }} - <> + <>

@@ -422,7 +417,7 @@
- Member of IndieWeb Webring 🕸💍 + Member of IndieWeb Webring
diff --git a/templates/style.css b/templates/style.css index eb77a11..50fb542 100644 --- a/templates/style.css +++ b/templates/style.css @@ -25,7 +25,7 @@ svg { width: 16px; height: 16px; fill: currentColor; - vertical-align: text-top; + vertical-align: middle; } a { @@ -41,6 +41,10 @@ h1 { font-size: 1.6em; } +h1, h2 { + line-height: 1.2em; +} + h2, h3 { margin-top: 2em; } @@ -153,6 +157,7 @@ code, pre { pre { padding: 0.6em; + position: relative; } code { @@ -163,6 +168,11 @@ pre > code { border: none; } +pre> code::before { + content: attr(lang); + float: right; +} + table { border-collapse: collapse; border-spacing: 0; @@ -280,16 +290,11 @@ body > footer dd { margin: -1.3em 0 0 4em; } -body > footer nav { - display: flex; - justify-content: space-between; -} - .webring { text-align: center; } -.footnote a { +.footnotes a { display: inline-block; overflow: hidden; white-space: nowrap; @@ -298,7 +303,7 @@ body > footer nav { max-width: 80%; } -.footnote-ref { +.footnote-back { margin: 0 0 0 0.1em; } @@ -306,6 +311,7 @@ body > footer nav { .follow { position: fixed; right: 1em; + z-index: 100; } .contrast { @@ -334,4 +340,9 @@ body > footer nav { body > header form { margin: 0; } + + body > footer nav { + display: flex; + justify-content: space-between; + } }