jsonfeed and gopher fixes

This commit is contained in:
Peter Molnar 2019-03-22 15:49:24 +00:00
parent 9f73a9b111
commit 5a5723aecc
9 changed files with 252 additions and 253 deletions

View file

@ -18,7 +18,6 @@ EXIFDATE = re.compile(
r'(?P<time>[0-9]{2}:[0-9]{2}:[0-9]{2})$' r'(?P<time>[0-9]{2}:[0-9]{2}:[0-9]{2})$'
) )
class CachedMeta(dict): class CachedMeta(dict):
def __init__(self, fpath): def __init__(self, fpath):
self.fpath = fpath self.fpath = fpath
@ -30,7 +29,7 @@ class CachedMeta(dict):
fname = os.path.basename(os.path.dirname(self.fpath)) fname = os.path.basename(os.path.dirname(self.fpath))
return os.path.join( return os.path.join(
settings.paths.get('tmp', 'tmp'), settings.tmpdir,
"%s.%s.json" % ( "%s.%s.json" % (
fname, fname,
self.__class__.__name__, self.__class__.__name__,

155
nasg.py
View file

@ -34,7 +34,7 @@ from slugify import slugify
import requests import requests
import lxml.etree as etree import lxml.etree as etree
from pandoc import PandocMarkdown, PandocTXT from pandoc import PandocMD2HTML, PandocMD2TXT, PandocHTML2TXT
from meta import Exif from meta import Exif
import settings import settings
from settings import struct from settings import struct
@ -47,6 +47,9 @@ MDFILE = 'index.md'
TXTFILE = 'index.txt' TXTFILE = 'index.txt'
HTMLFILE = 'index.html' HTMLFILE = 'index.html'
GOPHERFILE = 'gophermap' GOPHERFILE = 'gophermap'
ATOMFILE = 'atom.xml'
RSSFILE = 'index.xml'
JSONFEEDFILE = 'index.json'
MarkdownImage = namedtuple( MarkdownImage = namedtuple(
'MarkdownImage', 'MarkdownImage',
@ -111,6 +114,14 @@ def rfc3339todt(rfc3339):
J2.filters['printdate'] = rfc3339todt J2.filters['printdate'] = rfc3339todt
def extractlicense(url):
""" extract license name """
n, e = os.path.splitext(os.path.basename(url))
return n.upper()
J2.filters['extractlicense'] = extractlicense
RE_MYURL = re.compile( RE_MYURL = re.compile(
r'(^(%s[^"]+)$|"(%s[^"]+)")' % ( r'(^(%s[^"]+)$|"(%s[^"]+)")' % (
settings.site.url, settings.site.url,
@ -283,28 +294,29 @@ class MarkdownDoc(object):
meta, txt = frontmatter.parse(f.read()) meta, txt = frontmatter.parse(f.read())
return(meta, txt) return(meta, txt)
@property @cached_property
def meta(self): def meta(self):
return self._parsed[0] return self._parsed[0]
@property @cached_property
def content(self): def content(self):
return self._parsed[1] return self._parsed[1]
def pandoc(self, c):
if c and len(c):
c = str(PandocMarkdown(c))
c = RE_PRECODE.sub(
'<pre><code lang="\g<1>" class="language-\g<1>">', c)
return c
@cached_property @cached_property
def html_content(self): def html_content(self):
c = "%s" % (self.content) c = "%s" % (self.content)
if not len(c):
return c
if hasattr(self, 'images') and len(self.images): if hasattr(self, 'images') and len(self.images):
for match, img in self.images.items(): for match, img in self.images.items():
c = c.replace(match, str(img)) c = c.replace(match, str(img))
return self.pandoc(c) c = str(PandocMD2HTML(c))
c = RE_PRECODE.sub(
'<pre><code lang="\g<1>" class="language-\g<1>">',
c
)
return c
class Comment(MarkdownDoc): class Comment(MarkdownDoc):
@ -543,10 +555,16 @@ class Singular(MarkdownDoc):
@cached_property @cached_property
def html_summary(self): def html_summary(self):
c = self.summary c = "%s" % (self.summary)
if c and len(c): return PandocMD2HTML(c)
c = self.pandoc(self.summary)
return c @cached_property
def txt_summary(self):
return PandocMD2TXT(self.summary)
@cached_property
def txt_content(self):
return PandocMD2TXT(self.content)
@property @property
def title(self): def title(self):
@ -930,8 +948,8 @@ class Singular(MarkdownDoc):
g = { g = {
'post': self.jsonld, 'post': self.jsonld,
'summary': PandocTXT(self.summary), 'summary': self.txt_summary,
'content': PandocTXT(self.content) 'content': self.txt_content
} }
writepath( writepath(
self.gopherfile, self.gopherfile,
@ -992,9 +1010,9 @@ class Home(Singular):
) )
lines.append(line) lines.append(line)
lines.append('') lines.append('')
lines.append('') #lines.append('')
lines = lines + list(map(lambda x: ("%s" % x), settings.bye.split('\n'))) #lines = lines + list(settings.bye.split('\n'))
lines.append('') #lines.append('')
writepath(self.renderfile.replace(HTMLFILE,GOPHERFILE), "\r\n".join(lines)) writepath(self.renderfile.replace(HTMLFILE,GOPHERFILE), "\r\n".join(lines))
async def render(self): async def render(self):
@ -1683,7 +1701,7 @@ class Category(dict):
return os.path.join( return os.path.join(
self.dpath, self.dpath,
'feed', 'feed',
'index.xml' RSSFILE
) )
@property @property
@ -1691,7 +1709,15 @@ class Category(dict):
return os.path.join( return os.path.join(
self.dpath, self.dpath,
'feed', 'feed',
'atom.xml' ATOMFILE
)
@property
def jsonfeedfpath(self):
return os.path.join(
self.dpath,
'feed',
JSONFEEDFILE
) )
def get_posts(self, start=0, end=-1): def get_posts(self, start=0, end=-1):
@ -1836,6 +1862,50 @@ class Category(dict):
fg.link(href=settings.meta.get('hub'), rel='hub') fg.link(href=settings.meta.get('hub'), rel='hub')
writepath(self.atomfeedfpath, fg.atom_str(pretty=True)) writepath(self.atomfeedfpath, fg.atom_str(pretty=True))
async def render_json(self):
logger.info(
'rendering category "%s" JSON feed',
self.name,
)
start = 0
end = int(settings.pagination)
js = {
"version": "https://jsonfeed.org/version/1",
"title": self.title,
"home_page_url": settings.site.url,
"feed_url": "%s%s" % (self.url, JSONFEEDFILE),
"author": {
"name": settings.author.name,
"url": settings.author.url,
"avatar": settings.author.image,
},
"items": []
}
for k in reversed(self.sortedkeys[start:end]):
post = self[k]
pjs = {
"id": post.url,
"content_text": post.txt_content,
"content_html": post.html_content,
"url": post.url,
"date_published": str(post.published),
}
if len(post.summary):
pjs.update({"summary": post.txt_summary})
if post.is_photo:
pjs.update({"attachment": {
"url": post.photo.href,
"mime_type": post.photo.mime_type,
"size_in_bytes": "%d" % post.photo.mime_size
}})
js["items"].append(pjs)
writepath(
self.jsonfeedfpath,
json.dumps(js, indent=4, ensure_ascii=False)
)
async def render_flat(self): async def render_flat(self):
r = J2.get_template(self.template).render( r = J2.get_template(self.template).render(
self.tmplvars(self.get_posts()) self.tmplvars(self.get_posts())
@ -1856,6 +1926,9 @@ class Category(dict):
settings.site.name settings.site.name
) )
lines.append(line) lines.append(line)
#lines.append(post.datePublished)
if (len(post.description)):
lines.extend(str(PandocHTML2TXT(post.description)).split("\n"))
if isinstance(post['image'], list): if isinstance(post['image'], list):
for img in post['image']: for img in post['image']:
line = "I%s\t/%s/%s\t%s\t70" % ( line = "I%s\t/%s/%s\t%s\t70" % (
@ -1929,6 +2002,19 @@ class Category(dict):
self.name self.name
) )
if not self.is_uptodate(self.jsonfeedfpath, self.newest()):
logger.info(
'%s JSON feed outdated, generating new',
self.name
)
await self.render_json()
else:
logger.info(
'%s JSON feed up to date',
self.name
)
async def render(self): async def render(self):
await self.render_feeds() await self.render_feeds()
if not self.is_uptodate(self.indexfpath(), self.newest()): if not self.is_uptodate(self.indexfpath(), self.newest()):
@ -2083,6 +2169,31 @@ class WebmentionIO(object):
pass pass
# class GranaryIO(dict):
# granary = 'https://granary.io/url'
# convert_to = ['as2', 'mf2-json', 'jsonfeed']
# def __init__(self, source):
# self.source = source
# def run(self):
# for c in self.convert_to:
# p = {
# 'url': self.source,
# 'input': html,
# 'output': c
# }
# r = requests.get(self.granary, params=p)
# logger.info("queried granary.io for %s for url: %s", c, self.source)
# if r.status_code != requests.codes.ok:
# continue
# try:
# self[c] = webmentions.text
# except ValueError as e:
# logger.error('failed to query granary.io: %s', e)
# pass
def make(): def make():
start = int(round(time.time() * 1000)) start = int(round(time.time() * 1000))
last = 0 last = 0

136
pandoc.py
View file

@ -6,17 +6,44 @@ __email__ = "mail@petermolnar.net"
import subprocess import subprocess
import logging import logging
from tempfile import gettempdir
import hashlib
import os
import settings
class Pandoc(str):
class PandocBase(str):
in_format = 'html' in_format = 'html'
in_options = [] in_options = []
out_format = 'plain' out_format = 'plain'
out_options = [] out_options = []
columns = None columns = None
@property
def hash(self):
return str(hashlib.sha1(self.source.encode()).hexdigest())
@property
def cachefile(self):
return os.path.join(
settings.tmpdir,
"%s_%s.pandoc" % (
self.__class__.__name__,
self.hash
)
)
@property
def cache(self):
if not os.path.exists(self.cachefile):
return False
with open(self.cachefile, 'rt') as f:
self.result = f.read()
return True
def __init__(self, text): def __init__(self, text):
self.source = text self.source = text
if self.cache:
return
conv_to = '--to=%s' % (self.out_format) conv_to = '--to=%s' % (self.out_format)
if (len(self.out_options)): if (len(self.out_options)):
conv_to = '%s+%s' % ( conv_to = '%s+%s' % (
@ -58,6 +85,8 @@ class PandocBase(str):
) )
r = stdout.decode('utf-8').strip() r = stdout.decode('utf-8').strip()
self.result = r self.result = r
with open(self.cachefile, 'wt') as f:
f.write(self.result)
def __str__(self): def __str__(self):
return str(self.result) return str(self.result)
@ -66,7 +95,7 @@ class PandocBase(str):
return str(self.result) return str(self.result)
class PandocMarkdown(PandocBase): class PandocMD2HTML(Pandoc):
in_format = 'markdown' in_format = 'markdown'
in_options = [ in_options = [
'footnotes', 'footnotes',
@ -86,7 +115,7 @@ class PandocMarkdown(PandocBase):
out_options = [] out_options = []
class PandocHTML(PandocBase): class PandocHTML2MD(Pandoc):
in_format = 'html' in_format = 'html'
in_options = [] in_options = []
out_format = 'markdown' out_format = 'markdown'
@ -94,8 +123,6 @@ class PandocHTML(PandocBase):
'footnotes', 'footnotes',
'pipe_tables', 'pipe_tables',
'strikeout', 'strikeout',
# 'superscript',
# 'subscript',
'raw_html', 'raw_html',
'definition_lists', 'definition_lists',
'backtick_code_blocks', 'backtick_code_blocks',
@ -106,14 +133,12 @@ class PandocHTML(PandocBase):
] ]
class PandocTXT(PandocBase): class PandocMD2TXT(Pandoc):
in_format = 'markdown' in_format = 'markdown'
in_options = [ in_options = [
'footnotes', 'footnotes',
'pipe_tables', 'pipe_tables',
'strikeout', 'strikeout',
# 'superscript',
# 'subscript',
'raw_html', 'raw_html',
'definition_lists', 'definition_lists',
'backtick_code_blocks', 'backtick_code_blocks',
@ -124,91 +149,12 @@ class PandocTXT(PandocBase):
] ]
out_format = 'plain' out_format = 'plain'
out_options = [] out_options = []
columns = '--columns=72' columns = '--columns=80'
#class PandocMarkdown(str): class PandocHTML2TXT(Pandoc):
#def __new__(cls, text): in_format = 'html'
#""" Pandoc command line call with piped in- and output """ in_options = []
#cmd = ( out_format = 'plain'
#'pandoc', out_options = []
#'-o-', columns = '--columns=80'
#'--from=markdown+%s' % (
#'+'.join([
#'footnotes',
#'pipe_tables',
#'strikeout',
## 'superscript',
## 'subscript',
#'raw_html',
#'definition_lists',
#'backtick_code_blocks',
#'fenced_code_attributes',
#'shortcut_reference_links',
#'lists_without_preceding_blankline',
#'autolink_bare_uris',
#])
#),
#'--to=html5',
#'--quiet',
#'--no-highlight'
#)
#p = subprocess.Popen(
#cmd,
#stdin=subprocess.PIPE,
#stdout=subprocess.PIPE,
#stderr=subprocess.PIPE,
#)
#stdout, stderr = p.communicate(input=text.encode())
#if stderr:
#logging.warning(
#"Error during pandoc covert:\n\t%s\n\t%s",
#cmd,
#stderr
#)
#r = stdout.decode('utf-8').strip()
#return str.__new__(cls, r)
#class PandocHTML(str):
#def __new__(cls, text):
#""" Pandoc command line call with piped in- and output """
#cmd = (
#'pandoc',
#'-o-',
#'--to=markdown+%s' % (
#'+'.join([
#'footnotes',
#'pipe_tables',
#'strikeout',
## 'superscript',
## 'subscript',
#'raw_html',
#'definition_lists',
#'backtick_code_blocks',
#'fenced_code_attributes',
#'shortcut_reference_links',
#'lists_without_preceding_blankline',
#'autolink_bare_uris',
#])
#),
#'--from=html',
#'--quiet',
#)
#p = subprocess.Popen(
#cmd,
#stdin=subprocess.PIPE,
#stdout=subprocess.PIPE,
#stderr=subprocess.PIPE,
#)
#stdout, stderr = p.communicate(input=text.encode())
#if stderr:
#logging.warning(
#"Error during pandoc covert:\n\t%s\n\t%s",
#cmd,
#stderr
#)
#r = stdout.decode('utf-8').strip()
#return str.__new__(cls, r)

View file

@ -8,6 +8,7 @@ import os
import re import re
import argparse import argparse
import logging import logging
from tempfile import gettempdir
class struct(dict): class struct(dict):
@ -156,7 +157,6 @@ paths = struct({
'remotewww': 'web', 'remotewww': 'web',
'remotequeue': 'queue', 'remotequeue': 'queue',
'micropub': os.path.join(base, 'content', 'note'), 'micropub': os.path.join(base, 'content', 'note'),
'tmp': os.path.join(base, 'tmp'),
'home': os.path.join(base, 'content', 'home', 'index.md'), 'home': os.path.join(base, 'content', 'home', 'index.md'),
}) })
@ -171,28 +171,9 @@ photo = struct({
}, },
}) })
bye = """ tmpdir = os.path.join(gettempdir(),'nasg')
if not os.path.isdir(tmpdir):
os.makedirs(tmpdir)
"""
_parser = argparse.ArgumentParser(description='Parameters for NASG') _parser = argparse.ArgumentParser(description='Parameters for NASG')
_booleanparams = { _booleanparams = {

View file

@ -5,6 +5,7 @@
{% block meta %} {% block meta %}
<link rel="alternate" type="application/rss+xml" title="{{ category.title }} RSS feed" href="{{ category.feed }}" /> <link rel="alternate" type="application/rss+xml" title="{{ category.title }} RSS feed" href="{{ category.feed }}" />
<link rel="alternate" type="application/atom+xml" title="{{ category.title }} ATOM feed" href="{{ category.feed }}atom.xml" /> <link rel="alternate" type="application/atom+xml" title="{{ category.title }} ATOM feed" href="{{ category.feed }}atom.xml" />
<link rel="alternate" type="application/json" title="{{ category.title }} JSON feed" href="{{ category.feed }}index.json" />
<link rel="feed" title="{{ category.title}} feed" href="{{ category.url }}" /> <link rel="feed" title="{{ category.title}} feed" href="{{ category.url }}" />
{% endblock %} {% endblock %}

View file

@ -9,7 +9,6 @@
<link rel="canonical" href="{{ post.url }}" /> <link rel="canonical" href="{{ post.url }}" />
<link rel="alternate" type="application/json" href="{{ post.url }}index.json" /> <link rel="alternate" type="application/json" href="{{ post.url }}index.json" />
<link rel="alternate" type="application/ld+json" href="{{ post.url }}index.json" /> <link rel="alternate" type="application/ld+json" href="{{ post.url }}index.json" />
<link rel="alternate" type="application/mf2+json" href="https://pin13.net/mf2/?url={{ post.url|urlencode }}" />
<link rel="alternate" type="text/plain" href="{{ post.url }}index.txt" /> <link rel="alternate" type="text/plain" href="{{ post.url }}index.txt" />
<meta property="og:title" content="{{ post.headline }}" /> <meta property="og:title" content="{{ post.headline }}" />
<meta property="og:type" content="article" /> <meta property="og:type" content="article" />
@ -75,6 +74,58 @@
</a> </a>
{% endif %} {% endif %}
</h1> </h1>
<p>
<a rel="license" href="{{ post.license }}" class="u-license">
{{ post.license | extractlicense }}
</a>
by
<span class="p-author h-card vcard">
<img class="u-photo photo" src="{{ post.author.image|relurl(baseurl) }}" alt="" />
<a class="p-name u-url fn url" href="{{ post.author.url }}">{{ post.author.name }}</a>
(<a class="u-email email" href="mailto:{{ post.author.email }}">{{ post.author.email }}</a>)
</span>
at
<time datetime="{{ post.datePublished }}" class="dt-published published">{{ post.datePublished|printdate }}</time>
<time datetime="{{ post.dateModified }}" class="dt-updated updated"></time>
<br />
<a class="u-url u-uuid bookmark" href="{{ post.url }}">{{ post.url }}</a>
<!--
{% if post.sameAs|length %}
<br />
Syndicated to:
<ul>
{% for url in post.sameAs %}
<li>
<a class="u-syndication" href="{{ url }}">
{{ url }}
</a>
</li>
{% endfor %}
</ul>
{% endif %}
-->
</p>
{% if post.subjectOf %}
<p class="h-event vevent">
<span class="summary">
Journey from
<time class="dt-start dtstart" datetime="{{ post.subjectOf.startDate }}">
{{ post.subjectOf.startDate|printdate }}
</time>
to
<time class="dt-end dtend" datetime="{{ post.subjectOf.endDate }}">
{{ post.subjectOf.endDate|printdate }}
</time>, in
<span class="p-location location">
{{ post.subjectOf.location.name }}
</span>
</span>
<a class="u-url url" href="{{ post.url }}"></a>
</p>
{% endif %}
</header> </header>
{% if post.review %} {% if post.review %}
@ -108,106 +159,6 @@
{{ post.text|relurl(baseurl) }} {{ post.text|relurl(baseurl) }}
</div> </div>
<footer>
<dl>
{% if post.subjectOf %}
<dt>Trip details</dt>
<dd class="h-event vevent">
<span class="summary">
From
<time class="dt-start dtstart" datetime="{{ post.subjectOf.startDate }}">
{{ post.subjectOf.startDate|printdate }}
</time>
to
<time class="dt-end dtend" datetime="{{ post.subjectOf.endDate }}">
{{ post.subjectOf.endDate|printdate }}
</time>, in
<span class="p-location location">
{{ post.subjectOf.location.name }}
</span>
</span>
<a class="u-url url" href="{{ post.url }}"></a>
</dd>
{% endif %}
<dt>Published</dt>
<dd>
<time datetime="{{ post.datePublished }}" class="dt-published published">{{ post.datePublished|printdate }}</time>
<time datetime="{{ post.dateModified }}" class="dt-updated updated"></time>
</dd>
<dt>License</dt>
<dd class="license">
{% if 'CC-BY-4.0' in post.license %}
<a rel="license" href="{{ post.license }}" class="u-license">
CC-BY-4.0
</a>
<ul>
<li>you can share it</li>
<li>you can republish it</li>
<li>you can modify it, but you need to indicate the modifications</li>
<li>you can use it for commercial purposes</li>
<li>you always need to make a link back here</li>
</ul>
{% elif 'CC-BY-NC-4.0' in post.license %}
<a rel="license" href="{{ post.license }}" class="u-license">
CC-BY-NC-4.0
</a>
<ul>
<li>you can share it</li>
<li>you can republish it</li>
<li>you can modify it, but you need to indicate the modifications</li>
<li>you can't use it for commercial purposes</li>
<li>you always need to make a link back here</li>
</ul>
For commercial use, please contact me.
{% elif 'CC-BY-NC-ND-4.0' in post.license %}
<a rel="license" href="{{ post.license }}" class="u-license">
CC-BY-NC-ND-4.0
</a>
<ul>
<li>you can share it</li>
<li>you can't modify it</li>
<li>you can't republish it</li>
<li>you can't use it for commercial purposes</li>
<li>you always need to make a link back here</li>
</ul>
For commercial use, please contact me.
{% endif %}
</dd>
<dt>Author</dt>
<dd class="p-author h-card vcard">
<img class="u-photo photo" src="{{ post.author.image|relurl(baseurl) }}" alt="" />
<a class="p-name u-url fn url" href="{{ post.author.url }}">{{ post.author.name }}</a>
<a class="u-email email" href="mailto:{{ post.author.email }}">{{ post.author.email }}</a>
</dd>
<dt>Entry URL</dt>
<dd>
<a class="u-url u-uuid bookmark" href="{{ post.url }}">
{{ post.url }}
</a>
</dd>
{% if post.sameAs|length %}
<dt>Also on</dt>
<dd>
<ul>
{% for url in post.sameAs %}
<li>
<a class="u-syndication" href="{{ url }}">
{{ url }}
</a>
</li>
{% endfor %}
</ul>
</dd>
{% endif %}
</dl>
</footer>
{% if 'WebPage' != post['@type'] %} {% if 'WebPage' != post['@type'] %}
<section class="syndication"> <section class="syndication">
{% for action in post.potentialAction %} {% for action in post.potentialAction %}

View file

@ -1,9 +1,7 @@
--- {{ post.headline|center(width=80) }}
Title: {{ post.headline }}
Author: {{ post.author.name }} <{{ post.author.email}}> by {{ post.author.name }} <{{ post.author.email}}>
URL: {{ post.url }} {{ post.datePublished|printdate }}
Published: {{ post.datePublished|printdate }}
---
{{ summary }} {{ summary }}

View file

@ -137,9 +137,9 @@
</ul> </ul>
</nav> </nav>
<nav> <nav>
<a href="https://xn--sr8hvo.ws/🇻🇮📢/previous"></a> <a href="https://xn--sr8hvo.ws/%F0%9F%87%BB%F0%9F%87%AE%F0%9F%93%A2/previous"></a>
Member of <a href="https://xn--sr8hvo.ws">IndieWeb Webring</a> Member of <a href="https://xn--sr8hvo.ws">IndieWeb Webring</a>
<a href="https://xn--sr8hvo.ws/🇻🇮📢/next"></a> <a href="https://xn--sr8hvo.ws/%F0%9F%87%BB%F0%9F%87%AE%F0%9F%93%A2/next"></a>
</nav> </nav>
</section> </section>
<section> <section>

View file

@ -49,6 +49,8 @@ main p {
h1 { h1 {
border-bottom: 4px double #999; border-bottom: 4px double #999;
text-transform:uppercase; text-transform:uppercase;
text-align: center;
padding-bottom: 1em;
} }
article > footer > dl > dt, article > footer > dl > dt,
@ -215,7 +217,7 @@ figcaption > dl dd {
margin: 0 0.3em; margin: 0 0.3em;
} }
footer img { .vcard img {
height: 1em; height: 1em;
} }
@ -276,6 +278,16 @@ main > header > p {
text-align: center; text-align: center;
} }
article > header {
border-bottom: 4px double #999;
margin-bottom: 2em;
}
.h-feed article > header {
border: none;
margin: 0;
}
main ul { main ul {
margin-left: 2em; margin-left: 2em;
} }
@ -343,4 +355,4 @@ body > img {
right: 0; right: 0;
width: 10em; width: 10em;
height: auto; height: auto;
} }