2017-05-23 11:14:47 +01:00
|
|
|
#!/usr/bin/env python3
|
2018-07-25 13:24:31 +01:00
|
|
|
import _json
|
2017-12-17 17:37:32 +00:00
|
|
|
|
|
|
|
__author__ = "Peter Molnar"
|
2018-03-29 17:07:53 +01:00
|
|
|
__copyright__ = "Copyright 2017-2018, Peter Molnar"
|
2018-06-21 10:23:45 +01:00
|
|
|
__license__ = "GNU LGPLv3 "
|
2017-12-17 17:37:32 +00:00
|
|
|
__maintainer__ = "Peter Molnar"
|
2018-04-30 20:44:04 +01:00
|
|
|
__email__ = "mail@petermolnar.net"
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
import glob
|
2017-05-23 11:14:47 +01:00
|
|
|
import os
|
2018-07-20 16:45:42 +01:00
|
|
|
import time
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
from functools import partial
|
2017-05-23 11:14:47 +01:00
|
|
|
import re
|
2018-07-20 16:45:42 +01:00
|
|
|
import imghdr
|
2017-05-23 11:14:47 +01:00
|
|
|
import asyncio
|
2018-07-22 11:33:59 +01:00
|
|
|
import sqlite3
|
2018-07-25 13:24:31 +01:00
|
|
|
import json
|
2018-07-20 16:45:42 +01:00
|
|
|
from shutil import copy2 as cp
|
2017-10-27 10:29:33 +01:00
|
|
|
from math import ceil
|
2018-07-20 16:45:42 +01:00
|
|
|
from urllib.parse import urlparse
|
|
|
|
from collections import OrderedDict, namedtuple
|
2017-10-27 10:29:33 +01:00
|
|
|
import arrow
|
2017-05-23 11:14:47 +01:00
|
|
|
import langdetect
|
2017-10-27 10:29:33 +01:00
|
|
|
import wand.image
|
2018-07-20 16:45:42 +01:00
|
|
|
import jinja2
|
|
|
|
import frontmatter
|
2017-11-30 17:01:14 +00:00
|
|
|
from feedgen.feed import FeedGenerator
|
2018-07-20 16:45:42 +01:00
|
|
|
from bleach import clean
|
|
|
|
from emoji import UNICODE_EMOJI
|
2018-07-25 13:24:31 +01:00
|
|
|
from slugify import slugify
|
2018-07-22 17:59:26 +01:00
|
|
|
import requests
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
from pandoc import pandoc
|
2018-07-20 16:45:42 +01:00
|
|
|
import exiftool
|
|
|
|
import settings
|
2018-07-22 17:59:26 +01:00
|
|
|
import keys
|
2018-07-20 16:45:42 +01:00
|
|
|
|
|
|
|
MarkdownImage = namedtuple(
|
|
|
|
'MarkdownImage',
|
|
|
|
['match', 'alt', 'fname', 'title', 'css']
|
|
|
|
)
|
|
|
|
|
|
|
|
J2 = jinja2.Environment(
|
|
|
|
loader=jinja2.FileSystemLoader(searchpath=settings.paths.get('tmpl')),
|
|
|
|
lstrip_blocks=True,
|
|
|
|
trim_blocks=True
|
|
|
|
)
|
|
|
|
|
|
|
|
RE_MDIMG = re.compile(
|
|
|
|
r'(?P<match>!\[(?P<alt>[^\]]+)?\]\((?P<fname>[^\s]+)'
|
|
|
|
r'(?:\s[\'\"](?P<title>[^\"\']+)[\'\"])?\)(?:{(?P<css>[^\}]+)\})?)',
|
|
|
|
re.IGNORECASE
|
|
|
|
)
|
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
RE_CODE = re.compile(
|
|
|
|
r'(?:[~`]{3})(?:[^`]+)?'
|
2018-07-20 16:45:42 +01:00
|
|
|
)
|
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
RE_PRECODE = re.compile(
|
|
|
|
r'<pre class="([^"]+)"><code>'
|
2018-07-20 16:45:42 +01:00
|
|
|
)
|
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
class cached_property(object):
|
|
|
|
def __init__(self, method, name=None):
|
|
|
|
# record the unbound-method and the name
|
|
|
|
self.method = method
|
|
|
|
self.name = name or method.__name__
|
|
|
|
self.__doc__ = method.__doc__
|
|
|
|
def __get__(self, inst, cls):
|
|
|
|
# self: <__main__.cache object at 0xb781340c>
|
|
|
|
# inst: <__main__.Foo object at 0xb781348c>
|
|
|
|
# cls: <class '__main__.Foo'>
|
|
|
|
if inst is None:
|
|
|
|
# instance attribute accessed on class, return self
|
|
|
|
# You get here if you write `Foo.bar`
|
|
|
|
return self
|
|
|
|
# compute, cache and return the instance's attribute value
|
|
|
|
result = self.method(inst)
|
|
|
|
# setattr redefines the instance's attribute so this doesn't get called again
|
|
|
|
setattr(inst, self.name, result)
|
|
|
|
return result
|
|
|
|
|
2018-07-22 14:52:32 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
class MarkdownDoc(object):
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
@cached_property
|
2018-07-20 16:45:42 +01:00
|
|
|
def _parsed(self):
|
|
|
|
with open(self.fpath, mode='rt') as f:
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.debug('parsing YAML+MD file %s', self.fpath)
|
2018-07-20 16:45:42 +01:00
|
|
|
meta, txt = frontmatter.parse(f.read())
|
|
|
|
return(meta, txt)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def meta(self):
|
|
|
|
return self._parsed[0]
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def content(self):
|
|
|
|
return self._parsed[1]
|
2017-06-12 15:40:30 +01:00
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
@cached_property
|
2018-07-20 16:45:42 +01:00
|
|
|
def html_content(self):
|
|
|
|
c = "%s" % (self.content)
|
|
|
|
if hasattr(self, 'images') and len(self.images):
|
|
|
|
for match, img in self.images.items():
|
|
|
|
c = c.replace(match, str(img))
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
# return MD.reset().convert(c)
|
|
|
|
c = pandoc(c)
|
|
|
|
c = RE_PRECODE.sub('<pre><code lang="\g<1>" class="language-\g<1>">', c)
|
|
|
|
return c
|
2017-10-27 10:29:33 +01:00
|
|
|
|
2017-11-10 16:04:05 +00:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
class Comment(MarkdownDoc):
|
|
|
|
def __init__(self, fpath):
|
|
|
|
self.fpath = fpath
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def dt(self):
|
|
|
|
maybe = self.meta.get('date')
|
|
|
|
if maybe:
|
|
|
|
dt = arrow.get(maybe)
|
|
|
|
else:
|
|
|
|
dt = arrow.get(os.path.getmtime(self.fpath))
|
|
|
|
return dt
|
2017-06-04 11:38:36 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def target(self):
|
|
|
|
t = urlparse(self.meta.get('target'))
|
|
|
|
return t.path.rstrip('/').strip('/').split('/')[-1]
|
2017-06-04 11:38:36 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def source(self):
|
|
|
|
return self.meta.get('source')
|
2017-10-27 10:29:33 +01:00
|
|
|
|
2018-03-28 15:19:14 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def author(self):
|
|
|
|
r = {
|
|
|
|
'name': urlparse(self.source).hostname,
|
|
|
|
'url': self.source
|
|
|
|
}
|
|
|
|
author = self.meta.get('author')
|
|
|
|
if not author:
|
|
|
|
return r
|
|
|
|
if 'name' in author:
|
|
|
|
r.update({
|
|
|
|
'name': self.meta.get('author').get('name')
|
|
|
|
})
|
|
|
|
elif 'url' in author:
|
|
|
|
r.update({
|
|
|
|
'name': urlparse(self.meta.get('author').get('url')).hostname
|
|
|
|
})
|
|
|
|
return r
|
2018-03-28 15:19:14 +01:00
|
|
|
|
2017-10-28 19:08:40 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def type(self):
|
|
|
|
if len(self.content):
|
|
|
|
maybe = clean(self.content, strip=True)
|
|
|
|
if maybe in UNICODE_EMOJI:
|
|
|
|
return maybe
|
|
|
|
return self.meta.get('type', 'webmention')
|
2018-03-28 15:19:14 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def tmplvars(self):
|
|
|
|
return {
|
|
|
|
'author': self.author,
|
|
|
|
'source': self.source,
|
|
|
|
'pubtime': self.dt.format(settings.dateformat.get('iso')),
|
|
|
|
'pubdate': self.dt.format(settings.dateformat.get('display')),
|
|
|
|
'html': self.html_content,
|
|
|
|
'type': self.type
|
2018-03-28 15:19:14 +01:00
|
|
|
}
|
|
|
|
|
2017-11-30 17:01:14 +00:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
class Gone(object):
|
|
|
|
"""
|
|
|
|
Gone object for delete entries
|
|
|
|
"""
|
2018-04-30 20:44:04 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
def __init__(self, fpath):
|
|
|
|
self.fpath = fpath
|
2018-07-23 11:04:48 +01:00
|
|
|
self.mtime = os.path.getmtime(fpath)
|
2017-11-30 17:01:14 +00:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
2018-07-22 14:52:32 +01:00
|
|
|
def source(self):
|
|
|
|
source, fext = os.path.splitext(os.path.basename(self.fpath))
|
|
|
|
return source
|
2017-11-30 17:01:14 +00:00
|
|
|
|
2018-04-30 20:44:04 +01:00
|
|
|
|
2018-07-25 13:24:31 +01:00
|
|
|
class Redirect(Gone):
|
2018-07-20 16:45:42 +01:00
|
|
|
"""
|
|
|
|
Redirect object for entries that moved
|
|
|
|
"""
|
2017-11-30 17:01:14 +00:00
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
@cached_property
|
2018-07-20 16:45:42 +01:00
|
|
|
def target(self):
|
|
|
|
target = ''
|
|
|
|
with open(self.fpath, 'rt') as f:
|
|
|
|
target = f.read().strip()
|
|
|
|
return target
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-07-17 14:21:28 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
class Singular(MarkdownDoc):
|
|
|
|
"""
|
|
|
|
A Singular object: a complete representation of a post, including
|
|
|
|
all it's comments, files, images, etc
|
|
|
|
"""
|
2018-07-22 14:52:32 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def __init__(self, fpath):
|
|
|
|
self.fpath = fpath
|
2018-07-20 16:45:42 +01:00
|
|
|
n = os.path.dirname(fpath)
|
|
|
|
self.name = os.path.basename(n)
|
|
|
|
self.category = os.path.basename(os.path.dirname(n))
|
2017-10-27 10:29:33 +01:00
|
|
|
self.mtime = os.path.getmtime(self.fpath)
|
2017-10-30 10:47:08 +00:00
|
|
|
|
2018-07-22 17:59:26 +01:00
|
|
|
@property
|
|
|
|
def ctime(self):
|
|
|
|
ret = self.mtime
|
|
|
|
if len(self.comments):
|
|
|
|
ctime = int(sorted(self.comments.keys())[-1])
|
|
|
|
if ctime > self.mtime:
|
|
|
|
ret = ctime
|
|
|
|
return ret
|
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
@cached_property
|
2018-07-20 16:45:42 +01:00
|
|
|
def files(self):
|
|
|
|
"""
|
|
|
|
An array of files present at the same directory level as
|
|
|
|
the Singular object, excluding hidden (starting with .) and markdown
|
|
|
|
(ending with .md) files
|
|
|
|
"""
|
|
|
|
return [
|
|
|
|
k
|
|
|
|
for k in glob.glob(os.path.join(os.path.dirname(self.fpath), '*.*'))
|
|
|
|
if not k.endswith('.md') and not k.startswith('.')
|
|
|
|
]
|
2018-03-29 17:07:53 +01:00
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
@cached_property
|
2018-07-20 16:45:42 +01:00
|
|
|
def comments(self):
|
|
|
|
"""
|
|
|
|
An dict of Comment objects keyed with their path, populated from the
|
|
|
|
same directory level as the Singular objects
|
|
|
|
"""
|
2018-07-22 17:59:26 +01:00
|
|
|
comments = {}
|
2018-07-20 16:45:42 +01:00
|
|
|
files = [
|
|
|
|
k
|
|
|
|
for k in glob.glob(os.path.join(os.path.dirname(self.fpath), '*.md'))
|
|
|
|
if os.path.basename(k) != 'index.md'
|
|
|
|
]
|
|
|
|
for f in files:
|
|
|
|
c = Comment(f)
|
|
|
|
comments[c.dt.timestamp] = c
|
|
|
|
return comments
|
2017-06-12 15:17:29 +01:00
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
@cached_property
|
2018-07-20 16:45:42 +01:00
|
|
|
def images(self):
|
|
|
|
"""
|
|
|
|
A dict of WebImage objects, populated by:
|
|
|
|
- images that are present in the Markdown content
|
|
|
|
- and have an actual image file at the same directory level as
|
|
|
|
the Singular object
|
|
|
|
"""
|
|
|
|
images = {}
|
|
|
|
for match, alt, fname, title, css in RE_MDIMG.findall(self.content):
|
|
|
|
mdimg = MarkdownImage(match, alt, fname, title, css)
|
|
|
|
imgpath = os.path.join(
|
|
|
|
os.path.dirname(self.fpath),
|
|
|
|
fname
|
|
|
|
)
|
|
|
|
if imgpath in self.files:
|
|
|
|
if imghdr.what(imgpath):
|
|
|
|
images.update({match: WebImage(imgpath, mdimg, self)})
|
|
|
|
return images
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2018-06-17 18:30:50 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def is_front(self):
|
|
|
|
"""
|
|
|
|
Returns if the post should be displayed on the front
|
|
|
|
"""
|
|
|
|
if self.category in settings.site.get('on_front'):
|
|
|
|
return True
|
|
|
|
return False
|
2018-06-17 18:30:50 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def is_photo(self):
|
|
|
|
"""
|
|
|
|
This is true if there is a file, with the same name as the entry's
|
|
|
|
directory - so, it's slug -, and that that image believes it's a a
|
|
|
|
photo.
|
|
|
|
"""
|
2018-07-22 17:59:26 +01:00
|
|
|
if len(self.images) != 1:
|
|
|
|
return False
|
|
|
|
photo = next(iter(self.images.values()))
|
2018-07-20 16:45:42 +01:00
|
|
|
maybe = self.fpath.replace("index.md", "%s.jpg" % (self.name))
|
2018-07-22 17:59:26 +01:00
|
|
|
if photo.fpath == maybe:
|
2018-07-20 16:45:42 +01:00
|
|
|
return True
|
|
|
|
return False
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2018-07-27 14:55:21 +01:00
|
|
|
@property
|
|
|
|
def photo(self):
|
|
|
|
if not self.is_photo:
|
|
|
|
return None
|
|
|
|
return next(iter(self.images.values()))
|
|
|
|
|
2018-07-22 17:59:26 +01:00
|
|
|
@property
|
|
|
|
def enclosure(self):
|
|
|
|
if not self.is_photo:
|
2018-07-27 14:55:21 +01:00
|
|
|
return None
|
|
|
|
else:
|
|
|
|
return {
|
|
|
|
'mime': self.photo.mime_type,
|
|
|
|
'size': self.photo.mime_size,
|
|
|
|
'url': self.photo.href
|
|
|
|
}
|
2018-07-22 17:59:26 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def summary(self):
|
|
|
|
return self.meta.get('summary', '')
|
2017-10-27 10:29:33 +01:00
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
@cached_property
|
2018-07-20 16:45:42 +01:00
|
|
|
def html_summary(self):
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
# return MD.reset().convert(self.summary)
|
|
|
|
return pandoc(self.summary)
|
2017-10-27 15:56:05 +01:00
|
|
|
|
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def title(self):
|
|
|
|
if self.is_reply:
|
|
|
|
return "RE: %s" % self.is_reply
|
|
|
|
return self.meta.get(
|
|
|
|
'title',
|
|
|
|
arrow.get(
|
|
|
|
self.published).format(
|
|
|
|
settings.dateformat.get('display'))
|
|
|
|
)
|
2017-10-27 15:56:05 +01:00
|
|
|
|
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def tags(self):
|
|
|
|
return self.meta.get('tags', [])
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def syndicate(self):
|
|
|
|
urls = self.meta.get('syndicate', [])
|
|
|
|
if self.is_photo:
|
|
|
|
urls.append("https://brid.gy/publish/flickr")
|
|
|
|
return urls
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2018-07-25 13:24:31 +01:00
|
|
|
def baseN(self, num, b=36,
|
|
|
|
numerals="0123456789abcdefghijklmnopqrstuvwxyz"):
|
|
|
|
"""
|
|
|
|
Creates short, lowercase slug for a number (an epoch) passed
|
|
|
|
"""
|
|
|
|
num = int(num)
|
|
|
|
return ((num == 0) and numerals[0]) or (
|
|
|
|
self.baseN(
|
|
|
|
num // b,
|
|
|
|
b,
|
|
|
|
numerals
|
|
|
|
).lstrip(numerals[0]) + numerals[num % b]
|
|
|
|
)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def shortslug(self):
|
|
|
|
return self.baseN(self.published.timestamp)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def published(self):
|
|
|
|
return arrow.get(self.meta.get('published'))
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def is_reply(self):
|
|
|
|
return self.meta.get('in-reply-to', False)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def is_future(self):
|
2018-07-20 16:45:42 +01:00
|
|
|
if self.published.timestamp > arrow.utcnow().timestamp:
|
2017-05-23 11:14:47 +01:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def licence(self):
|
2018-07-20 16:45:42 +01:00
|
|
|
if self.category in settings.licence:
|
|
|
|
return settings.licence[self.category]
|
|
|
|
return settings.site.get('licence')
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def lang(self):
|
|
|
|
lang = 'en'
|
|
|
|
try:
|
|
|
|
lang = langdetect.detect("\n".join([
|
|
|
|
self.meta.get('title', ''),
|
|
|
|
self.content
|
|
|
|
]))
|
2017-11-10 16:04:05 +00:00
|
|
|
except BaseException:
|
2017-10-27 10:29:33 +01:00
|
|
|
pass
|
|
|
|
return lang
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def url(self):
|
|
|
|
return "%s/%s/" % (
|
|
|
|
settings.site.get('url'),
|
|
|
|
self.name
|
|
|
|
)
|
2017-06-03 12:07:03 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def replies(self):
|
|
|
|
r = OrderedDict()
|
|
|
|
for mtime, c in self.comments.items():
|
|
|
|
if c.type in ['webmention', 'in-reply-to']:
|
|
|
|
r[mtime] = c.tmplvars
|
|
|
|
return r
|
2017-07-17 14:21:28 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def reactions(self):
|
|
|
|
r = OrderedDict()
|
|
|
|
for mtime, c in self.comments.items():
|
|
|
|
if c.type in ['webmention', 'in-reply-to']:
|
2017-10-27 10:29:33 +01:00
|
|
|
continue
|
2018-07-20 16:45:42 +01:00
|
|
|
t = "%s" % (c.type)
|
|
|
|
if t not in r:
|
|
|
|
r[t] = OrderedDict()
|
|
|
|
r[t][mtime] = c.tmplvars
|
|
|
|
return r
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2018-08-02 22:47:49 +01:00
|
|
|
@property
|
|
|
|
def has_code(self):
|
|
|
|
if RE_CODE.search(self.content):
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
@cached_property
|
2018-07-20 16:45:42 +01:00
|
|
|
def tmplvars(self):
|
2018-07-22 17:59:26 +01:00
|
|
|
v = {
|
2018-07-20 16:45:42 +01:00
|
|
|
'title': self.title,
|
|
|
|
'category': self.category,
|
|
|
|
'lang': self.lang,
|
|
|
|
'slug': self.name,
|
|
|
|
'is_reply': self.is_reply,
|
|
|
|
'summary': self.summary,
|
|
|
|
'html_summary': self.html_summary,
|
|
|
|
'html_content': self.html_content,
|
|
|
|
'pubtime': self.published.format(settings.dateformat.get('iso')),
|
|
|
|
'pubdate': self.published.format(settings.dateformat.get('display')),
|
|
|
|
'year': int(self.published.format('YYYY')),
|
|
|
|
'licence': self.licence,
|
|
|
|
'replies': self.replies,
|
|
|
|
'reactions': self.reactions,
|
|
|
|
'syndicate': self.syndicate,
|
|
|
|
'url': self.url,
|
2018-07-31 16:20:19 +01:00
|
|
|
'review': self.meta.get('review', False),
|
2018-08-02 22:47:49 +01:00
|
|
|
'has_code': self.has_code,
|
2018-07-20 16:45:42 +01:00
|
|
|
}
|
2018-07-22 17:59:26 +01:00
|
|
|
if (self.enclosure):
|
|
|
|
v.update({'enclosure': self.enclosure})
|
|
|
|
return v
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def template(self):
|
|
|
|
return "%s.j2.html" % (self.__class__.__name__)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def renderdir(self):
|
|
|
|
return os.path.join(
|
|
|
|
settings.paths.get('build'),
|
|
|
|
self.name
|
|
|
|
)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2018-04-30 20:44:04 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def renderfile(self):
|
|
|
|
return os.path.join(self.renderdir, 'index.html')
|
2018-04-30 20:44:04 +01:00
|
|
|
|
2017-06-02 11:19:55 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def exists(self):
|
|
|
|
if settings.args.get('force'):
|
|
|
|
return False
|
|
|
|
elif not os.path.exists(self.renderfile):
|
|
|
|
return False
|
2018-07-22 17:59:26 +01:00
|
|
|
elif self.ctime > os.path.getmtime(self.renderfile):
|
2018-07-20 16:45:42 +01:00
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True
|
2018-06-29 10:40:22 +01:00
|
|
|
|
2018-07-22 08:48:47 +01:00
|
|
|
@property
|
|
|
|
def corpus(self):
|
2018-07-22 11:33:59 +01:00
|
|
|
return "\n".join([
|
2018-07-22 14:52:32 +01:00
|
|
|
self.title,
|
|
|
|
self.name,
|
|
|
|
self.summary,
|
|
|
|
self.content,
|
|
|
|
])
|
2018-07-22 08:48:47 +01:00
|
|
|
|
2018-07-25 13:24:31 +01:00
|
|
|
async def copyfiles(self):
|
|
|
|
# TODO: plain copy non-image files from entry directory to build/entry directory
|
|
|
|
return
|
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
async def render(self):
|
|
|
|
if self.exists:
|
|
|
|
return
|
|
|
|
r = J2.get_template(self.template).render({
|
|
|
|
'post': self.tmplvars,
|
|
|
|
'site': settings.site,
|
|
|
|
'author': settings.author,
|
|
|
|
'meta': settings.meta,
|
|
|
|
'licence': settings.licence,
|
|
|
|
'tips': settings.tips,
|
|
|
|
'labels': settings.labels
|
|
|
|
})
|
|
|
|
if not os.path.isdir(self.renderdir):
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.info("creating directory: %s", self.renderdir)
|
2018-07-20 16:45:42 +01:00
|
|
|
os.makedirs(self.renderdir)
|
|
|
|
with open(self.renderfile, 'wt') as f:
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.info("rendering to %s", self.renderfile)
|
2018-07-20 16:45:42 +01:00
|
|
|
f.write(r)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
class WebImage(object):
|
|
|
|
def __init__(self, fpath, mdimg, parent):
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.debug("loading image: %s", fpath)
|
2018-07-20 16:45:42 +01:00
|
|
|
self.mdimg = mdimg
|
|
|
|
self.fpath = fpath
|
|
|
|
self.parent = parent
|
|
|
|
self.mtime = os.path.getmtime(self.fpath)
|
|
|
|
self.fname, self.fext = os.path.splitext(os.path.basename(fpath))
|
|
|
|
self.resized_images = [
|
|
|
|
(k, self.Resized(self, k))
|
|
|
|
for k in settings.photo.get('sizes').keys()
|
|
|
|
if k < max(self.width, self.height)
|
|
|
|
]
|
|
|
|
if not len(self.resized_images):
|
|
|
|
self.resized_images.append((
|
|
|
|
max(self.width, self.height),
|
|
|
|
self.Resized(self, max(self.width, self.height))
|
|
|
|
))
|
2017-11-03 22:54:36 +00:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
def __str__(self):
|
|
|
|
if len(self.mdimg.css):
|
|
|
|
return self.mdimg.match
|
|
|
|
tmpl = J2.get_template("%s.j2.html" % (self.__class__.__name__))
|
|
|
|
return tmpl.render({
|
2018-08-02 22:47:49 +01:00
|
|
|
'src': self.src,
|
|
|
|
'href': self.href,
|
2018-07-20 16:45:42 +01:00
|
|
|
'width': self.displayed.width,
|
|
|
|
'height': self.displayed.height,
|
|
|
|
'title': self.title,
|
|
|
|
'caption': self.caption,
|
|
|
|
'exif': self.exif,
|
|
|
|
'is_photo': self.is_photo,
|
|
|
|
})
|
2018-03-21 15:42:36 +00:00
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
@cached_property
|
2018-07-20 16:45:42 +01:00
|
|
|
def meta(self):
|
|
|
|
return exiftool.Exif(self.fpath)
|
2018-03-21 15:42:36 +00:00
|
|
|
|
2018-06-08 10:14:39 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def caption(self):
|
|
|
|
if len(self.mdimg.alt):
|
|
|
|
return self.mdimg.alt
|
|
|
|
else:
|
|
|
|
return self.meta.get('Description', '')
|
2018-06-08 10:14:39 +01:00
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def title(self):
|
|
|
|
if len(self.mdimg.title):
|
|
|
|
return self.mdimg.title
|
|
|
|
else:
|
|
|
|
return self.meta.get('Headline', self.fname)
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def tags(self):
|
|
|
|
return list(set(self.meta.get('Subject', [])))
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def published(self):
|
|
|
|
return arrow.get(
|
|
|
|
self.meta.get('ReleaseDate', self.meta.get('ModifyDate'))
|
|
|
|
)
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def width(self):
|
|
|
|
return int(self.meta.get('ImageWidth'))
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def height(self):
|
|
|
|
return int(self.meta.get('ImageHeight'))
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2018-04-30 20:44:04 +01:00
|
|
|
@property
|
|
|
|
def mime_type(self):
|
|
|
|
return str(self.meta.get('MIMEType', 'image/jpeg'))
|
|
|
|
|
|
|
|
@property
|
|
|
|
def mime_size(self):
|
2018-07-20 16:45:42 +01:00
|
|
|
return os.path.getsize(self.linked.fpath)
|
2018-04-30 20:44:04 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def displayed(self):
|
|
|
|
ret = self.resized_images[0][1]
|
|
|
|
for size, r in self.resized_images:
|
|
|
|
if size == settings.photo.get('default'):
|
|
|
|
ret = r
|
|
|
|
return ret
|
2017-06-02 11:19:55 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def linked(self):
|
|
|
|
m = 0
|
|
|
|
ret = self.resized_images[0][1]
|
|
|
|
for size, r in self.resized_images:
|
|
|
|
if size > m:
|
|
|
|
m = size
|
|
|
|
ret = r
|
|
|
|
return ret
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
|
|
|
def src(self):
|
2018-07-20 16:45:42 +01:00
|
|
|
return self.displayed.url
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def href(self):
|
|
|
|
return self.linked.url
|
2017-07-26 11:23:06 +01:00
|
|
|
|
|
|
|
@property
|
2017-10-27 10:29:33 +01:00
|
|
|
def is_photo(self):
|
2018-07-20 16:45:42 +01:00
|
|
|
r = settings.photo.get('re_author', None)
|
|
|
|
if not r:
|
2017-10-27 10:29:33 +01:00
|
|
|
return False
|
|
|
|
cpr = self.meta.get('Copyright', '')
|
|
|
|
art = self.meta.get('Artist', '')
|
|
|
|
# both Artist and Copyright missing from EXIF
|
|
|
|
if not cpr and not art:
|
|
|
|
return False
|
|
|
|
# we have regex, Artist and Copyright, try matching them
|
2018-07-20 16:45:42 +01:00
|
|
|
if r.search(cpr) or r.search(art):
|
2017-10-27 10:29:33 +01:00
|
|
|
return True
|
|
|
|
return False
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
@property
|
|
|
|
def exif(self):
|
2018-07-20 16:45:42 +01:00
|
|
|
exif = {
|
|
|
|
'camera': '',
|
|
|
|
'aperture': '',
|
|
|
|
'shutter_speed': '',
|
|
|
|
'focallength': '',
|
|
|
|
'iso': '',
|
|
|
|
'lens': '',
|
|
|
|
'geo_latitude': '',
|
|
|
|
'geo_longitude': '',
|
|
|
|
}
|
2017-06-12 15:17:29 +01:00
|
|
|
if not self.is_photo:
|
2017-10-27 10:29:33 +01:00
|
|
|
return exif
|
2017-06-12 15:17:29 +01:00
|
|
|
|
|
|
|
mapping = {
|
2017-11-10 16:04:05 +00:00
|
|
|
'camera': ['Model'],
|
|
|
|
'aperture': ['FNumber', 'Aperture'],
|
|
|
|
'shutter_speed': ['ExposureTime'],
|
2018-06-08 10:17:57 +01:00
|
|
|
# 'focallength': ['FocalLengthIn35mmFormat', 'FocalLength'],
|
2017-11-10 16:04:05 +00:00
|
|
|
'focallength': ['FocalLength'],
|
|
|
|
'iso': ['ISO'],
|
|
|
|
'lens': ['LensID', 'LensSpec', 'Lens'],
|
|
|
|
'geo_latitude': ['GPSLatitude'],
|
|
|
|
'geo_longitude': ['GPSLongitude'],
|
2017-06-12 15:17:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
for ekey, candidates in mapping.items():
|
|
|
|
for candidate in candidates:
|
|
|
|
maybe = self.meta.get(candidate, None)
|
2017-10-27 10:29:33 +01:00
|
|
|
if not maybe:
|
|
|
|
continue
|
|
|
|
elif 'geo_' in ekey:
|
|
|
|
exif[ekey] = round(float(maybe), 5)
|
|
|
|
else:
|
|
|
|
exif[ekey] = maybe
|
|
|
|
break
|
|
|
|
return exif
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
def _maybe_watermark(self, img):
|
|
|
|
if not self.is_photo:
|
|
|
|
return img
|
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
wmarkfile = settings.paths.get('watermark')
|
|
|
|
if not os.path.exists(wmarkfile):
|
2017-05-23 11:14:47 +01:00
|
|
|
return img
|
|
|
|
|
|
|
|
with wand.image.Image(filename=wmarkfile) as wmark:
|
2018-07-20 16:45:42 +01:00
|
|
|
if self.width > self.height:
|
|
|
|
w = self.width * 0.2
|
2017-05-23 11:14:47 +01:00
|
|
|
h = wmark.height * (w / wmark.width)
|
2018-07-20 16:45:42 +01:00
|
|
|
x = self.width - w - (self.width * 0.01)
|
|
|
|
y = self.height - h - (self.height * 0.01)
|
2017-05-23 11:14:47 +01:00
|
|
|
else:
|
2018-07-20 16:45:42 +01:00
|
|
|
w = self.height * 0.16
|
2017-05-23 11:14:47 +01:00
|
|
|
h = wmark.height * (w / wmark.width)
|
2018-07-20 16:45:42 +01:00
|
|
|
x = self.width - h - (self.width * 0.01)
|
|
|
|
y = self.height - w - (self.height * 0.01)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
w = round(w)
|
|
|
|
h = round(h)
|
|
|
|
x = round(x)
|
|
|
|
y = round(y)
|
|
|
|
|
|
|
|
wmark.resize(w, h)
|
2018-07-20 16:45:42 +01:00
|
|
|
if self.width <= self.height:
|
2017-05-23 11:14:47 +01:00
|
|
|
wmark.rotate(-90)
|
|
|
|
img.composite(image=wmark, left=x, top=y)
|
|
|
|
return img
|
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
async def downsize(self):
|
2018-07-20 16:45:42 +01:00
|
|
|
need = False
|
|
|
|
for size, resized in self.resized_images:
|
|
|
|
if not resized.exists or settings.args.get('regenerate'):
|
|
|
|
need = True
|
|
|
|
break
|
|
|
|
if not need:
|
2017-10-27 10:29:33 +01:00
|
|
|
return
|
2017-05-23 11:14:47 +01:00
|
|
|
|
|
|
|
with wand.image.Image(filename=self.fpath) as img:
|
|
|
|
img.auto_orient()
|
2017-10-27 10:29:33 +01:00
|
|
|
img = self._maybe_watermark(img)
|
2018-07-20 16:45:42 +01:00
|
|
|
for size, resized in self.resized_images:
|
|
|
|
if not resized.exists or settings.args.get('regenerate'):
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.info(
|
2018-07-20 16:45:42 +01:00
|
|
|
"resizing image: %s to size %d",
|
|
|
|
os.path.basename(self.fpath),
|
|
|
|
size
|
|
|
|
)
|
|
|
|
await resized.make(img)
|
|
|
|
|
|
|
|
class Resized:
|
|
|
|
def __init__(self, parent, size, crop=False):
|
|
|
|
self.parent = parent
|
|
|
|
self.size = size
|
|
|
|
self.crop = crop
|
|
|
|
|
|
|
|
@property
|
|
|
|
def suffix(self):
|
|
|
|
return settings.photo.get('sizes').get(self.size, '')
|
|
|
|
|
|
|
|
@property
|
|
|
|
def fname(self):
|
|
|
|
return "%s%s%s" % (
|
|
|
|
self.parent.fname,
|
|
|
|
self.suffix,
|
|
|
|
self.parent.fext
|
|
|
|
)
|
2018-03-21 15:42:36 +00:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def fpath(self):
|
|
|
|
return os.path.join(
|
|
|
|
self.parent.parent.renderdir,
|
|
|
|
self.fname
|
|
|
|
)
|
2018-03-21 15:42:36 +00:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def url(self):
|
|
|
|
return "%s/%s/%s" % (
|
|
|
|
settings.site.get('url'),
|
|
|
|
self.parent.parent.name,
|
|
|
|
"%s%s%s" % (
|
|
|
|
self.parent.fname,
|
|
|
|
self.suffix,
|
|
|
|
self.parent.fext
|
|
|
|
)
|
|
|
|
)
|
2018-03-21 15:42:36 +00:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def relpath(self):
|
|
|
|
return "%s/%s" % (
|
|
|
|
self.parent.parent.renderdir.replace(
|
|
|
|
settings.paths.get('build'), ''
|
|
|
|
),
|
|
|
|
self.fname
|
|
|
|
)
|
2018-03-21 15:42:36 +00:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def exists(self):
|
|
|
|
if os.path.isfile(self.fpath):
|
|
|
|
if os.path.getmtime(self.fpath) >= self.parent.mtime:
|
|
|
|
return True
|
|
|
|
return False
|
2017-05-31 13:53:47 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def width(self):
|
|
|
|
return self.dimensions[0]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def height(self):
|
|
|
|
return self.dimensions[1]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def dimensions(self):
|
|
|
|
width = self.parent.width
|
|
|
|
height = self.parent.height
|
|
|
|
size = self.size
|
|
|
|
|
|
|
|
ratio = max(width, height) / min(width, height)
|
|
|
|
horizontal = True if (width / height) >= 1 else False
|
|
|
|
|
|
|
|
# panorama: reverse "horizontal" because the limit should be on
|
|
|
|
# the shorter side, not the longer, and make it a bit smaller, than
|
|
|
|
# the actual limit
|
|
|
|
# 2.39 is the wide angle cinematic view: anything wider, than that
|
|
|
|
# is panorama land
|
|
|
|
if ratio > 2.4 and not self.crop:
|
|
|
|
size = int(size * 0.6)
|
|
|
|
horizontal = not horizontal
|
|
|
|
|
|
|
|
if (horizontal and not self.crop) \
|
|
|
|
or (not horizontal and self.crop):
|
|
|
|
w = size
|
|
|
|
h = int(float(size / width) * height)
|
|
|
|
else:
|
|
|
|
h = size
|
|
|
|
w = int(float(size / height) * width)
|
|
|
|
return (w, h)
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
async def make(self, original):
|
|
|
|
if not os.path.isdir(os.path.dirname(self.fpath)):
|
|
|
|
os.makedirs(os.path.dirname(self.fpath))
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
with original.clone() as thumb:
|
|
|
|
thumb.resize(self.width, self.height)
|
2017-05-31 13:53:47 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
if self.crop:
|
|
|
|
thumb.liquid_rescale(self.size, self.size, 1, 1)
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
if self.parent.meta.get('FileType', 'jpeg').lower() == 'jpeg':
|
2018-07-25 13:24:31 +01:00
|
|
|
thumb.compression_quality = 88
|
2018-07-20 16:45:42 +01:00
|
|
|
thumb.unsharp_mask(
|
|
|
|
radius=1,
|
|
|
|
sigma=0.5,
|
|
|
|
amount=0.7,
|
|
|
|
threshold=0.5
|
|
|
|
)
|
|
|
|
thumb.format = 'pjpeg'
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
# this is to make sure pjpeg happens
|
|
|
|
with open(self.fpath, 'wb') as f:
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.info("writing %s", self.fpath)
|
2018-07-20 16:45:42 +01:00
|
|
|
thumb.save(file=f)
|
2017-10-27 15:56:05 +01:00
|
|
|
|
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
class AsyncWorker(object):
|
|
|
|
def __init__(self):
|
|
|
|
self._tasks = []
|
|
|
|
self._loop = asyncio.get_event_loop()
|
2017-10-27 15:56:05 +01:00
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
def add(self, job):
|
2018-07-20 16:45:42 +01:00
|
|
|
task = self._loop.create_task(job)
|
|
|
|
self._tasks.append(task)
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
def run(self):
|
2018-08-04 09:30:26 +01:00
|
|
|
self._loop.run_until_complete(asyncio.wait(self._tasks))
|
2018-03-21 15:42:36 +00:00
|
|
|
|
|
|
|
|
2018-07-22 14:52:32 +01:00
|
|
|
class IndexPHP(object):
|
|
|
|
def __init__(self):
|
|
|
|
self.gone = {}
|
|
|
|
self.redirect = {}
|
2018-03-21 15:42:36 +00:00
|
|
|
|
2018-07-23 11:04:48 +01:00
|
|
|
@property
|
|
|
|
def mtime(self):
|
|
|
|
r = 0
|
|
|
|
if os.path.exists(self.renderfile):
|
|
|
|
r = os.path.getmtime(self.renderfile)
|
|
|
|
return r
|
|
|
|
|
2018-07-22 14:52:32 +01:00
|
|
|
def add_gone(self, uri):
|
|
|
|
self.gone[uri] = True
|
|
|
|
|
|
|
|
def add_redirect(self, source, target):
|
|
|
|
if target in self.gone:
|
|
|
|
self.add_gone(source)
|
|
|
|
else:
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
if '://' not in target:
|
2018-07-22 14:52:32 +01:00
|
|
|
target = "%s/%s" % (settings.site.get('url'), target)
|
|
|
|
self.redirect[source] = target
|
|
|
|
|
2018-07-23 11:04:48 +01:00
|
|
|
@property
|
|
|
|
def renderfile(self):
|
|
|
|
return os.path.join(
|
2018-07-20 16:45:42 +01:00
|
|
|
settings.paths.get('build'),
|
2018-07-22 14:52:32 +01:00
|
|
|
'index.php'
|
2017-10-27 15:56:05 +01:00
|
|
|
)
|
2018-07-23 11:04:48 +01:00
|
|
|
|
|
|
|
async def render(self):
|
2018-07-22 14:52:32 +01:00
|
|
|
r = J2.get_template('Index.j2.php').render({
|
|
|
|
'post': {},
|
|
|
|
'site': settings.site,
|
|
|
|
'gones': self.gone,
|
|
|
|
'redirects': self.redirect
|
|
|
|
})
|
2018-07-23 11:04:48 +01:00
|
|
|
with open(self.renderfile, 'wt') as f:
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.info("rendering to %s", self.renderfile)
|
2018-07-22 14:52:32 +01:00
|
|
|
f.write(r)
|
2017-10-27 15:56:05 +01:00
|
|
|
|
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
class Category(dict):
|
|
|
|
def __init__(self, name=''):
|
|
|
|
self.name = name
|
|
|
|
self.page = 1
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
def __setitem__(self, key, value):
|
|
|
|
if key in self:
|
|
|
|
raise LookupError(
|
|
|
|
"key '%s' already exists, colliding posts are: %s vs %s" % (
|
|
|
|
key,
|
|
|
|
self[key].fpath,
|
|
|
|
value.fpath,
|
|
|
|
)
|
2018-03-29 17:07:53 +01:00
|
|
|
)
|
2018-07-20 16:45:42 +01:00
|
|
|
dict.__setitem__(self, key, value)
|
2017-10-30 10:47:08 +00:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
def get_posts(self, start=0, end=-1):
|
|
|
|
return [
|
|
|
|
self[k].tmplvars
|
|
|
|
for k in self.sortedkeys[start:end]
|
|
|
|
]
|
2018-06-01 10:49:14 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def sortedkeys(self):
|
|
|
|
return list(sorted(self.keys(), reverse=True))
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def display(self):
|
|
|
|
return settings.categorydisplay.get(self.name, '')
|
2018-06-01 10:49:14 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def title(self):
|
|
|
|
if len(self.name):
|
|
|
|
return "%s - %s" % (self.name, settings.site.get('domain'))
|
|
|
|
else:
|
|
|
|
return settings.site.get('title')
|
2018-06-01 10:49:14 +01:00
|
|
|
|
2017-10-29 19:11:01 +00:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def url(self):
|
|
|
|
if len(self.name):
|
2018-07-25 13:24:31 +01:00
|
|
|
url = "%s/category/%s/" % (settings.site.get('url'), self.name)
|
2018-07-20 16:45:42 +01:00
|
|
|
else:
|
2018-07-25 13:24:31 +01:00
|
|
|
url = '%s/' % (settings.site.get('url'))
|
2018-07-20 16:45:42 +01:00
|
|
|
return url
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2018-07-25 13:24:31 +01:00
|
|
|
@property
|
|
|
|
def feed(self):
|
|
|
|
return "%sfeed/" % (self.url)
|
|
|
|
|
2017-10-29 19:11:01 +00:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def template(self):
|
|
|
|
return "%s.j2.html" % (self.__class__.__name__)
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2017-10-29 19:11:01 +00:00
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def renderdir(self):
|
|
|
|
if len(self.name):
|
|
|
|
return os.path.join(
|
|
|
|
settings.paths.get('build'),
|
|
|
|
'category',
|
|
|
|
self.name
|
|
|
|
)
|
2017-10-30 09:24:46 +00:00
|
|
|
else:
|
2018-07-20 16:45:42 +01:00
|
|
|
return settings.paths.get('build')
|
2017-10-29 19:11:01 +00:00
|
|
|
|
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def tmplvars(self):
|
|
|
|
return {
|
|
|
|
'name': self.name,
|
|
|
|
'display': self.display,
|
|
|
|
'url': self.url,
|
|
|
|
'feed': "%s%s/" % (self.url, 'feed'),
|
2018-07-27 14:55:21 +01:00
|
|
|
'jsonfeed': "%s%s/index.json" % (self.url, 'feed'),
|
2018-07-20 16:45:42 +01:00
|
|
|
'title': self.title
|
|
|
|
}
|
2017-10-29 19:11:01 +00:00
|
|
|
|
|
|
|
@property
|
2018-07-20 16:45:42 +01:00
|
|
|
def mtime(self):
|
2018-07-25 13:24:31 +01:00
|
|
|
return arrow.get(self[self.sortedkeys[0]].published).timestamp
|
2017-10-27 15:56:05 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
@property
|
|
|
|
def exists(self):
|
|
|
|
if settings.args.get('force'):
|
|
|
|
return False
|
2018-07-27 14:55:21 +01:00
|
|
|
ismissing = False
|
|
|
|
for f in [
|
|
|
|
os.path.join(self.renderdir, 'feed', 'index.xml'),
|
|
|
|
]:
|
|
|
|
if not os.path.exists(f):
|
|
|
|
ismissing = True
|
|
|
|
elif self.mtime > os.path.getmtime(f):
|
|
|
|
ismissing = True
|
|
|
|
if ismissing:
|
2018-07-20 16:45:42 +01:00
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True
|
2017-11-10 15:56:45 +00:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
def render_feed(self):
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.info('rendering category "%s" ATOM feed', self.name)
|
2018-07-20 16:45:42 +01:00
|
|
|
start = 0
|
|
|
|
end = int(settings.site.get('pagination'))
|
2017-11-10 15:56:45 +00:00
|
|
|
|
2018-07-22 14:52:32 +01:00
|
|
|
dirname = os.path.join(self.renderdir, 'feed')
|
2018-07-20 16:45:42 +01:00
|
|
|
if not os.path.isdir(dirname):
|
|
|
|
os.makedirs(dirname)
|
2017-11-10 15:56:45 +00:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
fg = FeedGenerator()
|
2018-07-25 13:24:31 +01:00
|
|
|
fg.id(self.feed)
|
|
|
|
fg.link(href=self.feed, rel='self')
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
fg.link(href=settings.meta.get('hub'), rel='hub')
|
2018-07-20 16:45:42 +01:00
|
|
|
fg.title(self.title)
|
|
|
|
fg.author({
|
|
|
|
'name': settings.author.get('name'),
|
|
|
|
'email': settings.author.get('email')
|
|
|
|
})
|
|
|
|
fg.logo('%s/favicon.png' % settings.site.get('url'))
|
|
|
|
fg.updated(arrow.get(self.mtime).to('utc').datetime)
|
2017-10-27 10:29:33 +01:00
|
|
|
|
2018-07-22 14:52:32 +01:00
|
|
|
for post in self.get_posts(start, end):
|
2018-07-20 16:45:42 +01:00
|
|
|
dt = arrow.get(post.get('pubtime'))
|
|
|
|
fe = fg.add_entry()
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
fe.author({
|
|
|
|
'name': settings.author.get('name'),
|
|
|
|
'email':settings.author.get('email')
|
|
|
|
})
|
2018-07-20 16:45:42 +01:00
|
|
|
fe.id(post.get('url'))
|
|
|
|
fe.link(href=post.get('url'))
|
|
|
|
fe.title(post.get('title'))
|
|
|
|
fe.published(dt.datetime)
|
2018-07-25 13:24:31 +01:00
|
|
|
fe.updated(dt.datetime)
|
2018-07-20 16:45:42 +01:00
|
|
|
fe.content(
|
|
|
|
post.get('html_content'),
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
#src=post.get('url')
|
2018-07-20 16:45:42 +01:00
|
|
|
)
|
|
|
|
fe.rights('%s %s %s' % (
|
|
|
|
post.get('licence').upper(),
|
|
|
|
settings.author.get('name'),
|
|
|
|
dt.format('YYYY')
|
|
|
|
))
|
2018-07-22 17:59:26 +01:00
|
|
|
if 'enclosure' in post:
|
|
|
|
enc = post.get('enclosure')
|
|
|
|
fe.enclosure(
|
|
|
|
enc.get('url'),
|
|
|
|
"%d" % enc.get('size'),
|
|
|
|
enc.get('mime')
|
|
|
|
)
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
atom = os.path.join(dirname, 'index.xml')
|
|
|
|
with open(atom, 'wb') as f:
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.info('writing file: %s', atom)
|
2018-07-20 16:45:42 +01:00
|
|
|
f.write(fg.atom_str(pretty=True))
|
2017-10-28 19:08:40 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
def render_page(self, pagenum=1, pages=1):
|
|
|
|
if self.display == 'flat':
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
start = 0
|
2018-07-20 16:45:42 +01:00
|
|
|
end = -1
|
2017-10-27 10:29:33 +01:00
|
|
|
else:
|
2018-07-20 16:45:42 +01:00
|
|
|
pagination = int(settings.site.get('pagination'))
|
|
|
|
start = int((pagenum - 1) * pagination)
|
|
|
|
end = int(start + pagination)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
posts = self.get_posts(start, end)
|
|
|
|
r = J2.get_template(self.template).render({
|
|
|
|
'site': settings.site,
|
|
|
|
'author': settings.author,
|
|
|
|
'meta': settings.meta,
|
|
|
|
'licence': settings.licence,
|
|
|
|
'tips': settings.tips,
|
|
|
|
'labels': settings.labels,
|
|
|
|
'category': self.tmplvars,
|
|
|
|
'pages': {
|
|
|
|
'current': pagenum,
|
|
|
|
'total': pages,
|
|
|
|
},
|
|
|
|
'posts': posts,
|
|
|
|
})
|
|
|
|
if pagenum > 1:
|
|
|
|
renderdir = os.path.join(self.renderdir, 'page', str(pagenum))
|
|
|
|
else:
|
|
|
|
renderdir = self.renderdir
|
|
|
|
if not os.path.isdir(renderdir):
|
|
|
|
os.makedirs(renderdir)
|
|
|
|
renderfile = os.path.join(renderdir, 'index.html')
|
|
|
|
with open(renderfile, 'wt') as f:
|
|
|
|
f.write(r)
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
async def render(self):
|
|
|
|
if self.exists:
|
|
|
|
return
|
2017-05-23 11:14:47 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
if self.display == 'flat':
|
|
|
|
pagination = len(self)
|
|
|
|
else:
|
|
|
|
pagination = int(settings.site.get('pagination'))
|
2018-03-28 15:19:14 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
pages = ceil(len(self) / pagination)
|
|
|
|
page = 1
|
|
|
|
while page <= pages:
|
|
|
|
self.render_page(page, pages)
|
|
|
|
page = page + 1
|
|
|
|
self.render_feed()
|
|
|
|
|
2018-07-22 14:52:32 +01:00
|
|
|
|
2018-07-22 08:48:47 +01:00
|
|
|
class Search(object):
|
|
|
|
def __init__(self):
|
2018-07-23 11:04:48 +01:00
|
|
|
self.changed = False
|
2018-07-22 11:33:59 +01:00
|
|
|
self.fpath = os.path.join(
|
2018-07-22 08:48:47 +01:00
|
|
|
settings.paths.get('build'),
|
2018-07-22 11:33:59 +01:00
|
|
|
'search.sqlite'
|
|
|
|
)
|
|
|
|
self.db = sqlite3.connect(self.fpath)
|
|
|
|
self.db.execute('PRAGMA auto_vacuum = INCREMENTAL;')
|
|
|
|
self.db.execute('PRAGMA journal_mode = MEMORY;')
|
|
|
|
self.db.execute('PRAGMA temp_store = MEMORY;')
|
|
|
|
self.db.execute('PRAGMA locking_mode = NORMAL;')
|
|
|
|
self.db.execute('PRAGMA synchronous = FULL;')
|
|
|
|
self.db.execute('PRAGMA encoding = "UTF-8";')
|
|
|
|
self.db.execute('''
|
|
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS data USING fts4(
|
|
|
|
url,
|
|
|
|
mtime,
|
|
|
|
name,
|
|
|
|
title,
|
|
|
|
category,
|
|
|
|
content,
|
|
|
|
notindexed=category,
|
|
|
|
notindexed=url,
|
|
|
|
notindexed=mtime,
|
|
|
|
tokenize=porter
|
|
|
|
)'''
|
2018-07-23 11:04:48 +01:00
|
|
|
)
|
2018-07-22 08:48:47 +01:00
|
|
|
|
2018-07-22 11:33:59 +01:00
|
|
|
def __exit__(self):
|
2018-07-23 11:04:48 +01:00
|
|
|
if (self.changed):
|
|
|
|
self.db.commit()
|
|
|
|
self.db.execute('PRAGMA auto_vacuum;')
|
|
|
|
self.db.close()
|
2018-07-22 11:33:59 +01:00
|
|
|
|
2018-07-23 11:04:48 +01:00
|
|
|
def exists(self, name):
|
|
|
|
ret = 0
|
2018-07-22 14:52:32 +01:00
|
|
|
maybe = self.db.execute('''
|
|
|
|
SELECT
|
|
|
|
mtime
|
|
|
|
FROM
|
|
|
|
data
|
|
|
|
WHERE
|
|
|
|
name = ?
|
|
|
|
''', (name,)).fetchone()
|
|
|
|
if maybe:
|
|
|
|
ret = int(maybe[0])
|
|
|
|
return ret
|
|
|
|
|
2018-07-22 11:33:59 +01:00
|
|
|
def append(self, url, mtime, name, title, category, content):
|
|
|
|
mtime = int(mtime)
|
2018-07-23 11:04:48 +01:00
|
|
|
exists = self.exists(name)
|
2018-07-22 14:52:32 +01:00
|
|
|
if (exists and exists < mtime):
|
|
|
|
self.db.execute('''
|
|
|
|
DELETE
|
|
|
|
FROM
|
|
|
|
data
|
|
|
|
WHERE
|
|
|
|
name=?''', (name,))
|
2018-07-23 11:04:48 +01:00
|
|
|
exists = False
|
|
|
|
if not exists:
|
|
|
|
self.db.execute('''
|
|
|
|
INSERT INTO
|
|
|
|
data
|
|
|
|
(url, mtime, name, title, category, content)
|
|
|
|
VALUES
|
|
|
|
(?,?,?,?,?,?);
|
|
|
|
''', (
|
|
|
|
url,
|
|
|
|
mtime,
|
|
|
|
name,
|
|
|
|
title,
|
|
|
|
category,
|
|
|
|
content
|
|
|
|
))
|
|
|
|
self.changed = True
|
2018-07-22 08:48:47 +01:00
|
|
|
|
2018-07-22 11:33:59 +01:00
|
|
|
async def render(self):
|
2018-07-22 14:52:32 +01:00
|
|
|
target = os.path.join(
|
|
|
|
settings.paths.get('build'),
|
|
|
|
'search.php'
|
|
|
|
)
|
|
|
|
if os.path.exists(target):
|
|
|
|
return
|
2018-07-22 11:33:59 +01:00
|
|
|
r = J2.get_template('Search.j2.php').render({
|
|
|
|
'post': {},
|
|
|
|
'site': settings.site,
|
|
|
|
'author': settings.author,
|
|
|
|
'meta': settings.meta,
|
|
|
|
'licence': settings.licence,
|
|
|
|
'tips': settings.tips,
|
|
|
|
'labels': settings.labels
|
|
|
|
})
|
|
|
|
with open(target, 'wt') as f:
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.info("rendering to %s", target)
|
2018-07-22 11:33:59 +01:00
|
|
|
f.write(r)
|
2018-07-22 08:48:47 +01:00
|
|
|
|
2018-07-23 11:04:48 +01:00
|
|
|
class Sitemap(dict):
|
|
|
|
@property
|
|
|
|
def mtime(self):
|
|
|
|
r = 0
|
|
|
|
if os.path.exists(self.renderfile):
|
|
|
|
r = os.path.getmtime(self.renderfile)
|
|
|
|
return r
|
|
|
|
|
|
|
|
@property
|
|
|
|
def renderfile(self):
|
|
|
|
return os.path.join(settings.paths.get('build'), 'sitemap.txt')
|
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
async def render(self):
|
2018-07-23 11:04:48 +01:00
|
|
|
if self.mtime >= sorted(self.values())[-1]:
|
|
|
|
return
|
|
|
|
with open(self.renderfile, 'wt') as f:
|
|
|
|
f.write("\n".join(sorted(self.keys())))
|
2018-07-22 08:48:47 +01:00
|
|
|
|
2018-07-22 17:59:26 +01:00
|
|
|
|
|
|
|
def mkcomment(webmention):
|
|
|
|
dt = arrow.get(webmention.get('data').get('published'))
|
|
|
|
fdir = os.path.join(
|
|
|
|
settings.paths.get('content'),
|
|
|
|
webmention.get('target').strip('/').split('/')[-1]
|
|
|
|
)
|
|
|
|
fpath = os.path.join(
|
|
|
|
fdir,
|
|
|
|
"%d-%s.md" % (
|
|
|
|
dt.timestamp,
|
|
|
|
slugify(
|
|
|
|
re.sub(r"^https?://(?:www)?", "", url),
|
|
|
|
only_ascii=True,
|
|
|
|
lower=True
|
|
|
|
)[:200]
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
fm = frontmatter.loads('')
|
|
|
|
fm.metadata = {
|
|
|
|
'author': webmention.get('data').get('author'),
|
|
|
|
'date': dt.format(settings.dateformat.get('iso')),
|
|
|
|
'source': webmention.get('source'),
|
|
|
|
'target': webmention.get('target'),
|
|
|
|
'type': webmention.get('activity').get('type')
|
|
|
|
}
|
|
|
|
fm.content = webmention.get('data').get('content')
|
|
|
|
with open(fpath, 'wt') as f:
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.info("saving webmention to %s", fpath)
|
2018-07-22 17:59:26 +01:00
|
|
|
f.write(frontmatter.dumps(fm))
|
|
|
|
|
|
|
|
|
2018-07-23 11:04:48 +01:00
|
|
|
def makecomments():
|
|
|
|
newest = 0
|
|
|
|
content = settings.paths.get('content')
|
|
|
|
for e in glob.glob(os.path.join(content, '*', '*', '*.md')):
|
|
|
|
if os.path.basename(e) == 'index.md':
|
|
|
|
continue
|
|
|
|
mtime = os.path.getmtime(e)
|
|
|
|
if mtime > newest:
|
|
|
|
newest = mtime
|
|
|
|
newest = arrow.get(newest)
|
2018-07-22 17:59:26 +01:00
|
|
|
wio_params = {
|
|
|
|
'jsonp': '',
|
|
|
|
'token': '%s' % (keys.webmentionio.get('token')),
|
|
|
|
'since': '%s' % newest.format(settings.dateformat.get('iso')),
|
|
|
|
'domain': '%s' % (keys.webmentionio.get('domain'))
|
|
|
|
}
|
|
|
|
wio_url = "https://webmention.io/api/mentions"
|
|
|
|
webmentions = requests.get(wio_url, params=wio_params)
|
|
|
|
try:
|
|
|
|
webmentions = webmentions.json()
|
|
|
|
for webmention in webmentions.get('links'):
|
|
|
|
mkcomment(webmention)
|
|
|
|
except ValueError as e:
|
|
|
|
pass
|
|
|
|
|
2018-07-23 11:04:48 +01:00
|
|
|
|
|
|
|
def make():
|
|
|
|
start = int(round(time.time() * 1000))
|
|
|
|
last = 0
|
|
|
|
|
|
|
|
makecomments()
|
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
content = settings.paths.get('content')
|
2018-07-22 14:52:32 +01:00
|
|
|
worker = AsyncWorker()
|
|
|
|
rules = IndexPHP()
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
for e in glob.glob(os.path.join(content, '*', '*.ptr')):
|
|
|
|
post = Gone(e)
|
2018-07-23 11:04:48 +01:00
|
|
|
if post.mtime > last:
|
|
|
|
last = post.mtime
|
2018-07-22 14:52:32 +01:00
|
|
|
rules.add_gone(post.source)
|
|
|
|
for e in glob.glob(os.path.join(content, '*', '*.lnk')):
|
|
|
|
post = Redirect(e)
|
2018-07-23 11:04:48 +01:00
|
|
|
if post.mtime > last:
|
|
|
|
last = post.mtime
|
2018-07-22 14:52:32 +01:00
|
|
|
rules.add_redirect(post.source, post.target)
|
2018-07-20 16:45:42 +01:00
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
if rules.mtime < last or settings.args.get('force'):
|
|
|
|
worker.add(rules.render())
|
2018-07-23 11:04:48 +01:00
|
|
|
|
|
|
|
sitemap = Sitemap()
|
|
|
|
search = Search()
|
2018-07-20 16:45:42 +01:00
|
|
|
categories = {}
|
|
|
|
categories['/'] = Category()
|
2018-07-22 08:48:47 +01:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
for e in sorted(glob.glob(os.path.join(content, '*', '*', 'index.md'))):
|
|
|
|
post = Singular(e)
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
worker.add(post.copyfiles())
|
2018-07-20 16:45:42 +01:00
|
|
|
for i in post.images.values():
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
worker.add(i.downsize())
|
|
|
|
worker.add(post.render())
|
2018-07-25 13:24:31 +01:00
|
|
|
if post.is_future:
|
|
|
|
continue
|
|
|
|
else:
|
|
|
|
if post.category not in categories:
|
|
|
|
categories[post.category] = Category(post.category)
|
|
|
|
categories[post.category][post.published.timestamp] = post
|
|
|
|
if post.is_front:
|
|
|
|
categories['/'][post.published.timestamp] = post
|
|
|
|
if post.ctime > last:
|
|
|
|
last = post.ctime
|
|
|
|
sitemap[post.url] = post.mtime
|
|
|
|
search.append(
|
|
|
|
url=post.url,
|
|
|
|
mtime=post.mtime,
|
|
|
|
name=post.name,
|
|
|
|
title=post.title,
|
|
|
|
category=post.category,
|
|
|
|
content=post.content
|
|
|
|
)
|
2018-07-20 16:45:42 +01:00
|
|
|
|
2018-07-22 11:33:59 +01:00
|
|
|
search.__exit__()
|
2018-08-04 09:30:26 +01:00
|
|
|
worker.add(search.render())
|
2018-07-20 16:45:42 +01:00
|
|
|
for category in categories.values():
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
worker.add(category.render())
|
2017-05-23 11:14:47 +01:00
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
worker.add(sitemap.render())
|
2018-07-23 11:04:48 +01:00
|
|
|
|
2017-11-10 15:56:45 +00:00
|
|
|
worker.run()
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.info('worker finished')
|
2018-03-29 17:07:53 +01:00
|
|
|
|
2017-10-27 10:29:33 +01:00
|
|
|
# copy static
|
2018-08-02 22:47:49 +01:00
|
|
|
staticfiles = []
|
|
|
|
staticpaths = [
|
|
|
|
os.path.join(content, '*.*'),
|
|
|
|
os.path.join(settings.paths.get('tmpl'), '*.js')
|
|
|
|
]
|
|
|
|
for p in staticpaths:
|
|
|
|
staticfiles = staticfiles + glob.glob(p)
|
|
|
|
for e in staticfiles:
|
2018-07-20 16:45:42 +01:00
|
|
|
t = os.path.join(
|
|
|
|
settings.paths.get('build'),
|
|
|
|
os.path.basename(e)
|
|
|
|
)
|
|
|
|
if os.path.exists(t) and os.path.getmtime(e) <= os.path.getmtime(t):
|
|
|
|
continue
|
|
|
|
cp(e, t)
|
2018-03-21 15:42:36 +00:00
|
|
|
|
2018-07-20 16:45:42 +01:00
|
|
|
end = int(round(time.time() * 1000))
|
2018-08-04 09:30:26 +01:00
|
|
|
settings.logger.info('process took %d ms' % (end - start))
|
2017-11-10 16:04:05 +00:00
|
|
|
|
2018-07-22 14:52:32 +01:00
|
|
|
|
2017-05-23 11:13:35 +01:00
|
|
|
if __name__ == '__main__':
|
2018-07-20 16:45:42 +01:00
|
|
|
make()
|