jsonfeed and gopher fixes

2019-03-22 15:49:24 +00:00 · 2019-03-22 15:49:24 +00:00 · 5a5723aecc
commit 5a5723aecc
parent 9f73a9b111
9 changed files with 252 additions and 253 deletions
--- a/meta.py
+++ b/meta.py
@ -18,7 +18,6 @@ EXIFDATE = re.compile(
    r'(?P<time>[0-9]{2}:[0-9]{2}:[0-9]{2})$'
 )
 class CachedMeta(dict):
    def __init__(self, fpath):
        self.fpath = fpath
@ -30,7 +29,7 @@ class CachedMeta(dict):
            fname = os.path.basename(os.path.dirname(self.fpath))
        return os.path.join(
-            settings.paths.get('tmp', 'tmp'),
+            settings.tmpdir,
            "%s.%s.json" % (
                fname,
                self.__class__.__name__,
--- a/nasg.py
+++ b/nasg.py
@ -34,7 +34,7 @@ from slugify import slugify
 import requests
 import lxml.etree as etree
-from pandoc import PandocMarkdown, PandocTXT
+from pandoc import PandocMD2HTML, PandocMD2TXT, PandocHTML2TXT
 from meta import Exif
 import settings
 from settings import struct
@ -47,6 +47,9 @@ MDFILE = 'index.md'
 TXTFILE = 'index.txt'
 HTMLFILE = 'index.html'
 GOPHERFILE = 'gophermap'
 ATOMFILE = 'atom.xml'
 RSSFILE = 'index.xml'
 JSONFEEDFILE = 'index.json'
 MarkdownImage = namedtuple(
    'MarkdownImage',
@ -111,6 +114,14 @@ def rfc3339todt(rfc3339):
 J2.filters['printdate'] = rfc3339todt
 def extractlicense(url):
    """ extract license name """
    n, e = os.path.splitext(os.path.basename(url))
    return n.upper()
 J2.filters['extractlicense'] = extractlicense
 RE_MYURL = re.compile(
    r'(^(%s[^"]+)$|"(%s[^"]+)")' % (
        settings.site.url,
@ -283,28 +294,29 @@ class MarkdownDoc(object):
            meta, txt = frontmatter.parse(f.read())
        return(meta, txt)
-    @property
+    @cached_property
    def meta(self):
        return self._parsed[0]
-    @property
+    @cached_property
    def content(self):
        return self._parsed[1]
    def pandoc(self, c):
        if c and len(c):
            c = str(PandocMarkdown(c))
            c = RE_PRECODE.sub(
                '<pre><code lang="\g<1>" class="language-\g<1>">', c)
        return c
    @cached_property
    def html_content(self):
        c = "%s" % (self.content)
        if not len(c):
            return c
        if hasattr(self, 'images') and len(self.images):
            for match, img in self.images.items():
                c = c.replace(match, str(img))
-        return self.pandoc(c)
+        c = str(PandocMD2HTML(c))
        c = RE_PRECODE.sub(
            '<pre><code lang="\g<1>" class="language-\g<1>">',
            c
        )
        return c
 class Comment(MarkdownDoc):
@ -543,10 +555,16 @@ class Singular(MarkdownDoc):
    @cached_property
    def html_summary(self):
-        c = self.summary
+        c = "%s" % (self.summary)
-        if c and len(c):
+        return PandocMD2HTML(c)
-            c = self.pandoc(self.summary)
+
-        return c
+    @cached_property
    def txt_summary(self):
        return PandocMD2TXT(self.summary)
    @cached_property
    def txt_content(self):
        return PandocMD2TXT(self.content)
    @property
    def title(self):
@ -930,8 +948,8 @@ class Singular(MarkdownDoc):
        g = {
            'post': self.jsonld,
-            'summary': PandocTXT(self.summary),
+            'summary': self.txt_summary,
-            'content': PandocTXT(self.content)
+            'content': self.txt_content
        }
        writepath(
            self.gopherfile,
@ -992,9 +1010,9 @@ class Home(Singular):
            )
            lines.append(line)
        lines.append('')
-        lines.append('')
+        #lines.append('')
-        lines = lines + list(map(lambda x: ("%s" % x), settings.bye.split('\n')))
+        #lines = lines + list(settings.bye.split('\n'))
-        lines.append('')
+        #lines.append('')
        writepath(self.renderfile.replace(HTMLFILE,GOPHERFILE), "\r\n".join(lines))
    async def render(self):
@ -1683,7 +1701,7 @@ class Category(dict):
        return os.path.join(
            self.dpath,
            'feed',
-            'index.xml'
+            RSSFILE
        )
    @property
@ -1691,7 +1709,15 @@ class Category(dict):
        return os.path.join(
            self.dpath,
            'feed',
-            'atom.xml'
+            ATOMFILE
        )
    @property
    def jsonfeedfpath(self):
        return os.path.join(
            self.dpath,
            'feed',
            JSONFEEDFILE
        )
    def get_posts(self, start=0, end=-1):
@ -1836,6 +1862,50 @@ class Category(dict):
            fg.link(href=settings.meta.get('hub'), rel='hub')
            writepath(self.atomfeedfpath, fg.atom_str(pretty=True))
    async def render_json(self):
        logger.info(
            'rendering category "%s" JSON feed',
            self.name,
        )
        start = 0
        end = int(settings.pagination)
        js = {
            "version": "https://jsonfeed.org/version/1",
            "title": self.title,
            "home_page_url": settings.site.url,
            "feed_url": "%s%s" % (self.url, JSONFEEDFILE),
            "author": {
                "name": settings.author.name,
                "url": settings.author.url,
                "avatar": settings.author.image,
            },
            "items": []
        }
        for k in reversed(self.sortedkeys[start:end]):
            post = self[k]
            pjs = {
                "id": post.url,
                "content_text": post.txt_content,
                "content_html": post.html_content,
                "url": post.url,
                "date_published": str(post.published),
            }
            if len(post.summary):
                pjs.update({"summary": post.txt_summary})
            if post.is_photo:
                pjs.update({"attachment": {
                    "url": post.photo.href,
                    "mime_type": post.photo.mime_type,
                    "size_in_bytes": "%d" % post.photo.mime_size
                }})
            js["items"].append(pjs)
        writepath(
            self.jsonfeedfpath,
            json.dumps(js, indent=4, ensure_ascii=False)
        )
    async def render_flat(self):
        r = J2.get_template(self.template).render(
            self.tmplvars(self.get_posts())
@ -1856,6 +1926,9 @@ class Category(dict):
                settings.site.name
            )
            lines.append(line)
            #lines.append(post.datePublished)
            if (len(post.description)):
                lines.extend(str(PandocHTML2TXT(post.description)).split("\n"))
            if isinstance(post['image'], list):
                for img in post['image']:
                    line = "I%s\t/%s/%s\t%s\t70" % (
@ -1929,6 +2002,19 @@ class Category(dict):
                self.name
            )
        if not self.is_uptodate(self.jsonfeedfpath, self.newest()):
            logger.info(
                '%s JSON feed outdated, generating new',
                self.name
            )
            await self.render_json()
        else:
            logger.info(
                '%s JSON feed up to date',
                self.name
            )
    async def render(self):
        await self.render_feeds()
        if not self.is_uptodate(self.indexfpath(), self.newest()):
@ -2083,6 +2169,31 @@ class WebmentionIO(object):
            pass
 # class GranaryIO(dict):
    # granary = 'https://granary.io/url'
    # convert_to = ['as2', 'mf2-json', 'jsonfeed']
    # def __init__(self, source):
        # self.source = source
    # def run(self):
        # for c in self.convert_to:
            # p = {
                # 'url': self.source,
                # 'input': html,
                # 'output': c
            # }
            # r = requests.get(self.granary, params=p)
            # logger.info("queried granary.io for %s for url: %s", c, self.source)
            # if r.status_code != requests.codes.ok:
                # continue
            # try:
                # self[c] = webmentions.text
            # except ValueError as e:
                # logger.error('failed to query granary.io: %s', e)
                # pass
 def make():
    start = int(round(time.time() * 1000))
    last = 0
--- a/pandoc.py
+++ b/pandoc.py
@ -6,17 +6,44 @@ __email__ = "mail@petermolnar.net"
 import subprocess
 import logging
 from tempfile import gettempdir
 import hashlib
 import os
 import settings
-
+class Pandoc(str):
 class PandocBase(str):
    in_format = 'html'
    in_options = []
    out_format = 'plain'
    out_options = []
    columns = None
    @property
    def hash(self):
        return str(hashlib.sha1(self.source.encode()).hexdigest())
    @property
    def cachefile(self):
        return os.path.join(
            settings.tmpdir,
            "%s_%s.pandoc" % (
                self.__class__.__name__,
                self.hash
            )
        )
    @property
    def cache(self):
        if not os.path.exists(self.cachefile):
            return False
        with open(self.cachefile, 'rt') as f:
            self.result = f.read()
            return True
    def __init__(self, text):
        self.source = text
        if self.cache:
            return
        conv_to = '--to=%s' % (self.out_format)
        if (len(self.out_options)):
            conv_to = '%s+%s' % (
@ -58,6 +85,8 @@ class PandocBase(str):
            )
        r = stdout.decode('utf-8').strip()
        self.result = r
        with open(self.cachefile, 'wt') as f:
            f.write(self.result)
    def __str__(self):
        return str(self.result)
@ -66,7 +95,7 @@ class PandocBase(str):
        return str(self.result)
-class PandocMarkdown(PandocBase):
+class PandocMD2HTML(Pandoc):
    in_format = 'markdown'
    in_options = [
        'footnotes',
@ -86,7 +115,7 @@ class PandocMarkdown(PandocBase):
    out_options = []
-class PandocHTML(PandocBase):
+class PandocHTML2MD(Pandoc):
    in_format = 'html'
    in_options = []
    out_format = 'markdown'
@ -94,8 +123,6 @@ class PandocHTML(PandocBase):
        'footnotes',
        'pipe_tables',
        'strikeout',
        # 'superscript',
        # 'subscript',
        'raw_html',
        'definition_lists',
        'backtick_code_blocks',
@ -106,14 +133,12 @@ class PandocHTML(PandocBase):
    ]
-class PandocTXT(PandocBase):
+class PandocMD2TXT(Pandoc):
    in_format = 'markdown'
    in_options = [
        'footnotes',
        'pipe_tables',
        'strikeout',
        # 'superscript',
        # 'subscript',
        'raw_html',
        'definition_lists',
        'backtick_code_blocks',
@ -124,91 +149,12 @@ class PandocTXT(PandocBase):
    ]
    out_format = 'plain'
    out_options = []
-    columns = '--columns=72'
+    columns = '--columns=80'
-#class PandocMarkdown(str):
+class PandocHTML2TXT(Pandoc):
-    #def __new__(cls, text):
+    in_format = 'html'
-        #""" Pandoc command line call with piped in- and output """
+    in_options = []
-        #cmd = (
+    out_format = 'plain'
-            #'pandoc',
+    out_options = []
-            #'-o-',
+    columns = '--columns=80'
            #'--from=markdown+%s' % (
                #'+'.join([
                    #'footnotes',
                    #'pipe_tables',
                    #'strikeout',
                    ## 'superscript',
                    ## 'subscript',
                    #'raw_html',
                    #'definition_lists',
                    #'backtick_code_blocks',
                    #'fenced_code_attributes',
                    #'shortcut_reference_links',
                    #'lists_without_preceding_blankline',
                    #'autolink_bare_uris',
                #])
            #),
            #'--to=html5',
            #'--quiet',
            #'--no-highlight'
        #)
        #p = subprocess.Popen(
            #cmd,
            #stdin=subprocess.PIPE,
            #stdout=subprocess.PIPE,
            #stderr=subprocess.PIPE,
        #)
        #stdout, stderr = p.communicate(input=text.encode())
        #if stderr:
            #logging.warning(
                #"Error during pandoc covert:\n\t%s\n\t%s",
                #cmd,
                #stderr
            #)
        #r = stdout.decode('utf-8').strip()
        #return str.__new__(cls, r)
 #class PandocHTML(str):
    #def __new__(cls, text):
        #""" Pandoc command line call with piped in- and output """
        #cmd = (
            #'pandoc',
            #'-o-',
            #'--to=markdown+%s' % (
                #'+'.join([
                    #'footnotes',
                    #'pipe_tables',
                    #'strikeout',
                    ## 'superscript',
                    ## 'subscript',
                    #'raw_html',
                    #'definition_lists',
                    #'backtick_code_blocks',
                    #'fenced_code_attributes',
                    #'shortcut_reference_links',
                    #'lists_without_preceding_blankline',
                    #'autolink_bare_uris',
                #])
            #),
            #'--from=html',
            #'--quiet',
        #)
        #p = subprocess.Popen(
            #cmd,
            #stdin=subprocess.PIPE,
            #stdout=subprocess.PIPE,
            #stderr=subprocess.PIPE,
        #)
        #stdout, stderr = p.communicate(input=text.encode())
        #if stderr:
            #logging.warning(
                #"Error during pandoc covert:\n\t%s\n\t%s",
                #cmd,
                #stderr
            #)
        #r = stdout.decode('utf-8').strip()
        #return str.__new__(cls, r)
--- a/settings.py
+++ b/settings.py
@ -8,6 +8,7 @@ import os
 import re
 import argparse
 import logging
 from tempfile import gettempdir
 class struct(dict):
@ -156,7 +157,6 @@ paths = struct({
    'remotewww': 'web',
    'remotequeue': 'queue',
    'micropub': os.path.join(base, 'content', 'note'),
    'tmp': os.path.join(base, 'tmp'),
    'home': os.path.join(base, 'content', 'home', 'index.md'),
 })
@ -171,28 +171,9 @@ photo = struct({
    },
 })
-bye = """
+tmpdir = os.path.join(gettempdir(),'nasg')
-███████╗███████╗███████╗    ██╗   ██╗ ██████╗ ██╗   ██╗
+if not os.path.isdir(tmpdir):
-██╔════╝██╔════╝██╔════╝    ╚██╗ ██╔╝██╔═══██╗██║   ██║
+    os.makedirs(tmpdir)
 ███████╗█████╗  █████╗       ╚████╔╝ ██║   ██║██║   ██║
 ╚════██║██╔══╝  ██╔══╝        ╚██╔╝  ██║   ██║██║   ██║
 ███████║███████╗███████╗       ██║   ╚██████╔╝╚██████╔╝
 ╚══════╝╚══════╝╚══════╝       ╚═╝    ╚═════╝  ╚═════╝
 ███████╗██████╗  █████╗  ██████╗███████╗
 ██╔════╝██╔══██╗██╔══██╗██╔════╝██╔════╝
 ███████╗██████╔╝███████║██║     █████╗
 ╚════██║██╔═══╝ ██╔══██║██║     ██╔══╝
 ███████║██║     ██║  ██║╚██████╗███████╗
 ╚══════╝╚═╝     ╚═╝  ╚═╝ ╚═════╝╚══════╝
 ██████╗ ██████╗ ██╗    ██╗██████╗  ██████╗ ██╗   ██╗
 ██╔════╝██╔═══██╗██║    ██║██╔══██╗██╔═══██╗╚██╗ ██╔╝
 ██║     ██║   ██║██║ █╗ ██║██████╔╝██║   ██║ ╚████╔╝
 ██║     ██║   ██║██║███╗██║██╔══██╗██║   ██║  ╚██╔╝
 ╚██████╗╚██████╔╝╚███╔███╔╝██████╔╝╚██████╔╝   ██║
 ╚═════╝ ╚═════╝  ╚══╝╚══╝ ╚═════╝  ╚═════╝    ╚═╝
 """
 _parser = argparse.ArgumentParser(description='Parameters for NASG')
 _booleanparams = {
--- a/templates/Category.j2.html
+++ b/templates/Category.j2.html
@ -5,6 +5,7 @@
 {% block meta %}
    <link rel="alternate" type="application/rss+xml" title="{{ category.title }} RSS feed" href="{{ category.feed }}" />
    <link rel="alternate" type="application/atom+xml" title="{{ category.title }} ATOM feed" href="{{ category.feed }}atom.xml" />
    <link rel="alternate" type="application/json" title="{{ category.title }} JSON feed" href="{{ category.feed }}index.json" />
    <link rel="feed" title="{{ category.title}} feed" href="{{ category.url }}" />
 {% endblock %}
--- a/templates/Singular.j2.html
+++ b/templates/Singular.j2.html
@ -9,7 +9,6 @@
    <link rel="canonical" href="{{ post.url }}" />
    <link rel="alternate" type="application/json" href="{{ post.url }}index.json" />
    <link rel="alternate" type="application/ld+json" href="{{ post.url }}index.json" />
    <link rel="alternate" type="application/mf2+json" href="https://pin13.net/mf2/?url={{ post.url|urlencode }}" />
    <link rel="alternate" type="text/plain" href="{{ post.url }}index.txt" />
    <meta property="og:title" content="{{ post.headline }}" />
    <meta property="og:type" content="article" />
@ -75,6 +74,58 @@
                </a>
                {% endif %}
            </h1>
            <p>
                <a rel="license" href="{{ post.license }}" class="u-license">
                    {{ post.license | extractlicense }}
                </a>
                by
                <span class="p-author h-card vcard">
                    <img class="u-photo photo" src="{{ post.author.image|relurl(baseurl) }}" alt="" />
                    <a class="p-name u-url fn url" href="{{ post.author.url }}">{{ post.author.name }}</a>
                    (<a class="u-email email" href="mailto:{{ post.author.email }}">{{ post.author.email }}</a>)
                </span>
                at
                <time datetime="{{ post.datePublished }}" class="dt-published published">{{ post.datePublished|printdate }}</time>
                <time datetime="{{ post.dateModified }}" class="dt-updated updated"></time>
                <br />
                <a class="u-url u-uuid bookmark" href="{{ post.url }}">{{ post.url }}</a>
 <!--
 {% if post.sameAs|length %}
                <br />
                    Syndicated to:
                    <ul>
                    {% for url in post.sameAs %}
                        <li>
                            <a class="u-syndication" href="{{ url }}">
                                {{ url }}
                            </a>
                        </li>
                    {% endfor %}
                    </ul>
 {% endif %}
 -->
            </p>
 {% if post.subjectOf %}
            <p class="h-event vevent">
                <span class="summary">
                    Journey from
                        <time class="dt-start dtstart" datetime="{{ post.subjectOf.startDate }}">
                            {{ post.subjectOf.startDate|printdate }}
                        </time>
                        to
                        <time class="dt-end dtend" datetime="{{ post.subjectOf.endDate }}">
                            {{ post.subjectOf.endDate|printdate }}
                        </time>, in
                        <span class="p-location location">
                            {{ post.subjectOf.location.name }}
                        </span>
                    </span>
                    <a class="u-url url" href="{{ post.url }}"></a>
                </p>
 {% endif %}
        </header>
        {% if post.review %}
@ -108,106 +159,6 @@
            {{ post.text|relurl(baseurl) }}
        </div>
        <footer>
            <dl>
                {% if post.subjectOf %}
                <dt>Trip details</dt>
                <dd class="h-event vevent">
                    <span class="summary">
                        From
                        <time class="dt-start dtstart" datetime="{{ post.subjectOf.startDate }}">
                            {{ post.subjectOf.startDate|printdate }}
                        </time>
                        to
                        <time class="dt-end dtend" datetime="{{ post.subjectOf.endDate }}">
                            {{ post.subjectOf.endDate|printdate }}
                        </time>, in
                        <span class="p-location location">
                            {{ post.subjectOf.location.name }}
                        </span>
                    </span>
                    <a class="u-url url" href="{{ post.url }}"></a>
                </dd>
                {% endif %}
                <dt>Published</dt>
                <dd>
                    <time datetime="{{ post.datePublished }}" class="dt-published published">{{ post.datePublished|printdate }}</time>
                    <time datetime="{{ post.dateModified }}" class="dt-updated updated"></time>
                </dd>
                <dt>License</dt>
                <dd class="license">
                {% if 'CC-BY-4.0' in post.license %}
                    <a rel="license" href="{{ post.license }}" class="u-license">
                        CC-BY-4.0
                    </a>
                    <ul>
                        <li>you can share it</li>
                        <li>you can republish it</li>
                        <li>you can modify it, but you need to indicate the modifications</li>
                        <li>you can use it for commercial purposes</li>
                        <li>you always need to make a link back here</li>
                    </ul>
                {% elif 'CC-BY-NC-4.0' in post.license %}
                    <a rel="license" href="{{ post.license }}" class="u-license">
                        CC-BY-NC-4.0
                    </a>
                    <ul>
                        <li>you can share it</li>
                        <li>you can republish it</li>
                        <li>you can modify it, but you need to indicate the modifications</li>
                        <li>you can't use it for commercial purposes</li>
                        <li>you always need to make a link back here</li>
                    </ul>
                    For commercial use, please contact me.
                {% elif 'CC-BY-NC-ND-4.0' in post.license %}
                    <a rel="license" href="{{ post.license }}" class="u-license">
                        CC-BY-NC-ND-4.0
                    </a>
                    <ul>
                        <li>you can share it</li>
                        <li>you can't modify it</li>
                        <li>you can't republish it</li>
                        <li>you can't use it for commercial purposes</li>
                        <li>you always need to make a link back here</li>
                    </ul>
                    For commercial use, please contact me.
                {% endif %}
                </dd>
                <dt>Author</dt>
                <dd class="p-author h-card vcard">
                    <img class="u-photo photo" src="{{ post.author.image|relurl(baseurl) }}" alt="" />
                    <a class="p-name u-url fn url" href="{{ post.author.url }}">{{ post.author.name }}</a>
                    <a class="u-email email" href="mailto:{{ post.author.email }}">{{ post.author.email }}</a>
                </dd>
                <dt>Entry URL</dt>
                <dd>
                    <a class="u-url u-uuid bookmark" href="{{ post.url }}">
                        {{ post.url }}
                    </a>
                </dd>
                {% if post.sameAs|length %}
                <dt>Also on</dt>
                <dd>
                    <ul>
                    {% for url in post.sameAs %}
                    <li>
                        <a class="u-syndication" href="{{ url }}">
                            {{ url }}
                        </a>
                    </li>
                    {% endfor %}
                    </ul>
                </dd>
                {% endif %}
            </dl>
        </footer>
    {% if 'WebPage' != post['@type'] %}
        <section class="syndication">
        {% for action in post.potentialAction %}
--- a/templates/Singular.j2.txt
+++ b/templates/Singular.j2.txt
@ -1,9 +1,7 @@
---
+{{ post.headline|center(width=80) }}
-Title: {{ post.headline }}
+
-Author: {{ post.author.name }} <{{ post.author.email}}>
+by {{ post.author.name }} <{{ post.author.email}}>
-URL: {{ post.url }}
+{{ post.datePublished|printdate }}
 Published: {{ post.datePublished|printdate }}
 ---
 {{ summary }}
--- a/templates/base.j2.html
+++ b/templates/base.j2.html
@ -137,9 +137,9 @@
            </ul>
        </nav>
        <nav>
-            <a href="https://xn--sr8hvo.ws/🇻🇮📢/previous">←</a>
+            <a href="https://xn--sr8hvo.ws/%F0%9F%87%BB%F0%9F%87%AE%F0%9F%93%A2/previous">←</a>
                Member of <a href="https://xn--sr8hvo.ws">IndieWeb Webring</a>
-            <a href="https://xn--sr8hvo.ws/🇻🇮📢/next">→</a>
+            <a href="https://xn--sr8hvo.ws/%F0%9F%87%BB%F0%9F%87%AE%F0%9F%93%A2/next">→</a>
        </nav>
    </section>
    <section>
--- a/templates/style.css
+++ b/templates/style.css
@ -49,6 +49,8 @@ main p {
 h1 {
  border-bottom: 4px double #999;
  text-transform:uppercase;
  text-align: center;
  padding-bottom: 1em;
 }
 article > footer > dl > dt,
@ -215,7 +217,7 @@ figcaption > dl dd {
  margin: 0 0.3em;
 }
-footer img {
+.vcard img {
  height: 1em;
 }
@ -276,6 +278,16 @@ main > header > p {
  text-align: center;
 }
 article > header {
  border-bottom: 4px double #999;
  margin-bottom: 2em;
 }
 .h-feed article > header {
  border: none;
  margin: 0;
 }
 main ul {
  margin-left: 2em;
 }
@ -343,4 +355,4 @@ body > img {
  right: 0;
  width: 10em;
  height: auto;
-}
+}