- new functionality called "memento": on first publish, copy the rendered HTML to memento.html that will never change. Also try to fetch old versions from archive.org with --memento - layout fixes - moved CC by xyz text to the bottom of an entry from the top, so it's less distracting
@@ -16,6 +16,7 @@ import json
import queue import base64 from shutil import copy2 as cp +from shutil import rmtree from math import ceil from urllib.parse import urlparse from collections import OrderedDict, namedtuple@@ -438,10 +439,47 @@ self.fpath = fpath
self.mtime = mtime(fpath) @property + def renderdir(self): + return os.path.join( + settings.paths.get('build'), + self.source + ) + + @property + def renderfile(self): + return os.path.join( + self.renderdir, + settings.filenames.html + ) + + @property def source(self): source, fext = os.path.splitext(os.path.basename(self.fpath)) return source + @property + def template(self): + return "%s.j2.html" % (self.__class__.__name__) + + @property + def tmplvars(self): + return { + 'source': self.source + } + + async def render(self): + if os.path.exists(self.renderfile): + rmtree(os.path.dirname(self.renderfile)) + #logger.info( + #'rendering %s to %s', + #self.__class__.__name__, + #self.source + #) + #r = J2.get_template(self.template).render( + #self.tmplvars + #) + #writepath(self.renderfile, r) + class Redirect(Gone): """@@ -455,6 +493,12 @@ with open(self.fpath, 'rt') as f:
target = f.read().strip() return target + @property + def tmplvars(self): + return { + 'source': self.source, + 'target': self.target + } class Singular(MarkdownDoc): """@@ -878,6 +922,20 @@ settings.filenames.html
) @property + def mementofile(self): + return os.path.join( + os.path.dirname(self.fpath), + settings.filenames.memento + ) + + @property + def has_memento(self): + if os.path.exists(self.mementofile): + if os.path.getsize(self.mementofile) > 0: + return True + return False + + @property def gopherfile(self): return os.path.join( self.renderdir,@@ -980,9 +1038,100 @@ continue
logger.info("copying '%s' to '%s'", f, t) cp(f, t) + async def make_memento(self): + cp(self.renderfile, self.mementofile) + return + + async def wayback_save(self): + requests.get('http://web.archive.org/save/%s' % (self.url)) + + def try_memento(self, url): + try: + params = { + 'url': url, + 'timestamp': '%s' % (self.published.format('YYYY-MM-DD')) + } + waybackmachine = 'http://archive.org/wayback/available' + snapshots = requests.get(waybackmachine, params=params).json() + # no archived version... + if not len(snapshots.get('archived_snapshots', None)): + logger.warning('no snapshot found for %s', url) + return None + else: + logger.info('snapshot FOUND for %s', url) + + snapshot = snapshots.get('archived_snapshots').get('closest') + logger.info('getting %s', snapshot['url']) + original = requests.get(snapshot['url']) + return original.text + except Exception as e: + logger.warning('wayback memento failed for %s: %s', url, e) + return None + + + def waybackmemento(self): + if self.has_memento: + return + + # this commented out part is extremely specific to my old site + # but it helps anyone who had multiple domains and/or taxonomy + # structures + # formerdomains = [ + # 'cadeyrn.webporfolio.hu', + # 'blog.petermolnar.eu', + # 'petermolnar.eu', + # 'petermolnar.net', + # ] + + # formercategories = [ + # 'linux-tech-coding', + # 'diy-do-it-yourself', + # 'photoblog', + # 'it', + # 'sysadmin-blog', + # 'sysadmin', + # 'fotography', + # 'blips', + # 'blog', + # 'r' + # ] + + # for domain in formerdomains: + # maybe = None + # url = url = 'http://%s/%s/' % (domain, self.name) + # maybe = self.try_memento(url) + # if maybe: + # break + # for formercategory in formercategories: + # url = 'http://%s/%s/%s/' % (domain, formercategory, self.name) + # maybe = self.try_memento(url) + # if maybe: + # break + # if maybe: + # break + + maybe = self.try_memento(self.url) + if maybe: + with open(self.mementofile, 'wt') as f: + logger.info( + 'saving memento for %s to %s', + self.name, + self.mementofile + ) + f.write(maybe) + + async def render(self): + if settings.args.get('memento'): + self.waybackmemento() + if self.exists: return + + memento = False + if self.has_memento: + memento = "%s%s" % (self.url, settings.filenames.memento) + logger.info("rendering %s", self.name) v = { 'baseurl': self.url,@@ -990,7 +1139,8 @@ 'post': self.jsonld,
'site': settings.site, 'menu': settings.menu, 'meta': settings.meta, - 'fnames': settings.filenames + 'fnames': settings.filenames, + 'memento': memento } writepath( self.renderfile,@@ -1018,6 +1168,10 @@ os.path.join(self.renderdir, settings.filenames.json),
json.dumps(j, indent=4, ensure_ascii=False) ) del(j) + if not os.path.exists(self.mementofile): + if self.published.timestamp >= settings.mementostartime: + copy(self.renderfile, self.mementofile) + # oembed # writepath( # os.path.join(self.renderdir, settings.filenames.oembed_json),@@ -1796,7 +1950,7 @@
def feedpath(self, fname): return os.path.join( self.dpath, - settings.paths.category, + settings.paths.feed, fname )@@ -2099,9 +2253,35 @@ self[post.url] = post.mtime
@property def renderfile(self): - return os.path.join(settings.paths.get('build'), 'sitemap.txt') + return os.path.join( + settings.paths.get('build'), + settings.filenames.sitemap + ) async def render(self): + if not len(self): + return + + if self.mtime >= sorted(self.values())[-1]: + return + + sitemap = etree.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9") + xmldoc = etree.ElementTree(sitemap) + for url, mtime in self.items(): + e = etree.SubElement(sitemap, "url") + loc = etree.SubElement(e, "loc").text = url + lastmod = etree.SubElement(e, "lastmod").text = str(arrow.get(mtime)) + s = etree.tostring( + xmldoc, + encoding='utf-8', + xml_declaration=True, + pretty_print=True + ) + + with open(self.renderfile, 'wb') as f: + f.write(s) + + async def render_txt(self): if len(self) > 0: if self.mtime >= sorted(self.values())[-1]: return@@ -2270,6 +2450,7 @@ incoming.run()
queue = AQ() send = [] + firsttimepublished = [] content = settings.paths.get('content') rules = IndexPHP()@@ -2303,6 +2484,9 @@ # skip draft posts from anything further
if post.is_future: logger.info('%s is for the future', post.name) continue + elif not os.path.exists(post.renderfile): + logger.debug('%s seems to be fist time published', post.name) + firsttimepublished.append(post) # add post to search database search.append(post)@@ -2363,9 +2547,6 @@ if os.path.exists(t) and mtime(e) <= mtime(t):
continue cp(e, t) - # dat data - #dat() - end = int(round(time.time() * 1000)) logger.info('process took %d ms' % (end - start))@@ -2391,6 +2572,9 @@ queue.put(wm.send())
queue.run() logger.info('sending webmentions finished') + for post in firsttimepublished: + queue.put(post.make_memento()) + queue.put(post.wayback_save()) if __name__ == '__main__': make()
@@ -24,6 +24,7 @@ pagination = 42
notinfeed = ['note'] flat = ['article', 'journal'] displaydate = 'YYYY-MM-DD HH:mm' +mementostartime = 1561192582 licence = struct({ 'article': 'CC-BY-4.0',@@ -169,7 +170,9 @@ 'txt': 'index.txt',
'html': 'index.html', 'gopher': 'gophermap', 'oembed_xml': 'oembed.xml', - 'oembed_json': 'oembed.json' + 'oembed_json': 'oembed.json', + 'memento': 'memento.html', + 'sitemap': 'sitemap.xml' }) datignore = [@@ -212,7 +215,8 @@ 'debug': 'set logging to debug level',
'quiet': 'show only errors', 'offline': 'offline mode - no syncing, no querying services, etc.', 'noping': 'make dummy webmention entries and don\'t really send them', - 'noservices': 'skip querying any service but do sync the website' + 'noservices': 'skip querying any service but do sync the website', + 'memento': 'try to fetch mementos from archive.org' } for k, v in _booleanparams.items():
@@ -121,7 +121,7 @@ {% block lang %}{% endblock %}
{% block title %}Search results for: <?php echo($_GET['q']); ?>{% endblock %} {% block content %} -<main class="h-feed hatom"> +<main id="main" class="h-feed hatom"> <h1>Search results for: <?php echo($_GET['q']); ?></h1> <dl> <?php
@@ -62,87 +62,30 @@
{% block content %} <main id="main"> <article class="h-entry hentry" lang="{{ post.inLanguage }}" id="article"> - <header class="entry-header"> - <h1 class="p-name entry-title"> - {% if post.mentions %} - <span> - <svg width="16" height="16"> - <use xlink:href="#icon-reply" /> - </svg> - <a href="{{ post.url|relurl(baseurl) }}"> - RE: - </a> - <a href="{{ post.mentions.url }}" class="u-in-reply-to"> - {{ post.mentions.url }} - </a> - </span> - {% else %} + {% if memento %} + <aside id="memento"> + <span>current version</span> | <a href="{{ memento|relurl(baseurl) }}">how it originally looked in {{ post.copyrightYear }}</a> + </aside> + {% endif %} + <h1 class="p-name entry-title"> + {% if post.mentions %} + <span> + <svg width="16" height="16"> + <use xlink:href="#icon-reply" /> + </svg> <a href="{{ post.url|relurl(baseurl) }}"> - {{ post.headline }} + RE: + </a> + <a href="{{ post.mentions.url }}" class="u-in-reply-to"> + {{ post.mentions.url }} </a> - {% endif %} - </h1> - - <p>Licenced under - <a rel="license" href="{{ post.license }}" class="u-license">{{ post.license | extractlicense }}</a>, - created by - <span class="p-author h-card vcard"> - <img class="u-photo photo" src="{{ post.author.image|relurl(baseurl) }}" alt="" /> - <a class="p-name u-url fn url" href="{{ post.author.url }}">{{ post.author.name }}</a> - (<a class="u-email email" href="mailto:{{ post.author.email }}">{{ post.author.email }}</a>), - </span> - published at - <time datetime="{{ post.datePublished }}" class="dt-published published">{{ post.datePublished|printdate }}</time> - <time datetime="{{ post.dateModified }}" class="dt-updated updated"></time> - to canonical URL - <a class="u-url u-uuid bookmark" href="{{ post.url }}">{{ post.url }}</a> - {% if post.keywords|length > 0 %} - with keywords - {% for keyword in post.keywords %} - #<span class="p-category">{{ keyword }}</span>{% if not loop.last %} {% endif %} - {% endfor %} - {% endif %} - {% if post['@type'] == 'Photograph' %} - {% if post.image[0].locationCreated %} - from the location - <a class="h-geo" href="https://www.openstreetmap.org/#map=14/{{ post.image[0].locationCreated.geo.longitude }}/{{ post.image[0].locationCreated.geo.latitude }}"> - <span class="p-longitude">{{ post.image[0].locationCreated.geo.longitude }}</span>, - <span class="p-latitude">{{ post.image[0].locationCreated.geo.latitude }}</span> - </a> - {% endif %} - {% endif %} - </p> - {% if post.sameAs|length %} - <p> - This post was also syndicated to: - <ul> - {% for url in post.sameAs %} - <li><a class="u-syndication" href="{{ url }}">{{ url }}</a></li> - {% endfor %} - </ul> - </p> + </span> + {% else %} + <a href="{{ post.url|relurl(baseurl) }}"> + {{ post.headline }} + </a> {% endif %} - -{% if post.subjectOf %} - <p class="h-event vevent"> - <span class="summary"> - Journey from - <time class="dt-start dtstart" datetime="{{ post.subjectOf.startDate }}"> - {{ post.subjectOf.startDate|printdate }} - </time> - to - <time class="dt-end dtend" datetime="{{ post.subjectOf.endDate }}"> - {{ post.subjectOf.endDate|printdate }} - </time>, in - <span class="p-location location"> - {{ post.subjectOf.location.name }} - </span> - </span> - <a class="u-url url" href="{{ post.url }}"></a> - </p> -{% endif %} - - </header> + </h1> {% if post.review %} <section class="h-review hreview">@@ -232,6 +175,68 @@ </section>
{% endif %} {% endif %} + + <footer class="entry-footer"> + <p>Licenced under + <a rel="license" href="{{ post.license }}" class="u-license">{{ post.license | extractlicense }}</a>, + created by + <span class="p-author h-card vcard"> + <img class="u-photo photo" src="{{ post.author.image|relurl(baseurl) }}" alt="" /> + <a class="p-name u-url fn url" href="{{ post.author.url }}">{{ post.author.name }}</a> + (<a class="u-email email" href="mailto:{{ post.author.email }}">{{ post.author.email }}</a>), + </span> + published at + <time datetime="{{ post.datePublished }}" class="dt-published published">{{ post.datePublished|printdate }}</time> + <time datetime="{{ post.dateModified }}" class="dt-updated updated"></time> + to canonical URL + <a class="u-url u-uuid bookmark" href="{{ post.url }}">{{ post.url }}</a> + {% if post.keywords|length > 0 %} + with keywords + {% for keyword in post.keywords %} + "<span class="p-category">{{ keyword }}</span>"{% if not loop.last %}, {% endif %} + {% endfor %} + {% endif %} + {% if post['@type'] == 'Photograph' %} + {% if post.image[0].locationCreated %} + from the location + <a class="h-geo" href="https://www.openstreetmap.org/#map=14/{{ post.image[0].locationCreated.geo.longitude }}/{{ post.image[0].locationCreated.geo.latitude }}"> + <span class="p-longitude">{{ post.image[0].locationCreated.geo.longitude }}</span>, + <span class="p-latitude">{{ post.image[0].locationCreated.geo.latitude }}</span> + </a> + {% endif %} + {% endif %} + </p> + {% if post.sameAs|length %} + <p> + This post was also syndicated to: + <ul> + {% for url in post.sameAs %} + <li><a class="u-syndication" href="{{ url }}">{{ url }}</a></li> + {% endfor %} + </ul> + </p> + {% endif %} + +{% if post.subjectOf %} + <p class="h-event vevent"> + <span class="summary"> + Journey from + <time class="dt-start dtstart" datetime="{{ post.subjectOf.startDate }}"> + {{ post.subjectOf.startDate|printdate }} + </time> + to + <time class="dt-end dtend" datetime="{{ post.subjectOf.endDate }}"> + {{ post.subjectOf.endDate|printdate }} + </time>, in + <span class="p-location location"> + {{ post.subjectOf.location.name }} + </span> + </span> + <a class="u-url url" href="{{ post.url }}"></a> + </p> +{% endif %} + </footer> + </article> </main> {% endblock %}
@@ -10,7 +10,6 @@ <meta name="viewport" content="width=device-width,initial-scale=1,minimum-scale=1" />
<meta name="author" content="{{ site.author.name }} ({{ site.author.email }})" /> <link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="{{ site.name }}"> <link rel="icon" href="{{ site.image }}" /> - <!-- <base href="{{ baseurl }}" /> --> {% for key, value in meta.items() %} <link rel="{{ key }}" href="{{ value }}" /> {% endfor %}
@@ -75,4 +75,4 @@ #comments .u-url:hover,
#main a:hover, #pagination a:hover { border-bottom: 1px solid #014384; -} +}
@@ -2,18 +2,23 @@ * {
-webkit-box-sizing: border-box; -moz-box-sizing: border-box; box-sizing: border-box; - font-family: "Courier New", monospace; + font-family: "Ubuntu", "Roboto", "Segoe UI", "San Francisco", "Droid Sans", "Tahoma", "Helvetica Neue", "Lucida Grande", sans-serif; margin: 0; padding: 0; line-height: 1.5em; + letter-spacing: 0.02em; } article, aside, footer, header, nav, section { display:block; } +html { + background-color: #111; +} + body { - color: #eee; + color: #ddd; background-color: #222; }@@ -132,6 +137,7 @@ margin: 0 0.3em;
} code, pre { + font-family: "Courier New", monospace; color: #3c3; border: 1px solid #666; direction: ltr;@@ -222,7 +228,6 @@ margin: 0 0 0 0.6em;
} #pagination ol, -.footnotes ol, #header ul, #footer ul { list-style-type: none;@@ -232,6 +237,11 @@
#header li, #footer li { display: inline-block; + margin: 0 0.8em 0 0; +} + +#header li { + font-weight: bold; } #footer p {@@ -259,7 +269,7 @@ }
#pagination li { display: inline-block; - padding: 0.3em; + padding: 0.3em 0.6em; } #pagination a {@@ -329,6 +339,26 @@ }
.h-feed .exif { display: none; +} + +.entry-footer { + border-top: 1px dashed #999; + margin: 2em 0; +} + +.h-feed .entry-footer { + display: none; + visibility: hidden; +} + +.h-feed .h-entry { + + padding: 1em; + margin:1em 0; +} + +.h-feed .h-entry h3 { + margin:0; } @media all and (max-width: 47em) {