2017-05-23 11:14:47 +01:00
|
|
|
import configparser
|
|
|
|
import os
|
2017-05-26 10:14:24 +01:00
|
|
|
import re
|
|
|
|
import glob
|
|
|
|
import logging
|
|
|
|
import subprocess
|
2017-06-28 12:20:26 +01:00
|
|
|
import json
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
from whoosh import fields
|
|
|
|
from whoosh import analysis
|
2017-06-28 12:20:26 +01:00
|
|
|
from slugify import slugify
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-07-05 22:09:06 +01:00
|
|
|
LLEVEL = {
|
|
|
|
'critical': 50,
|
|
|
|
'error': 40,
|
|
|
|
'warning': 30,
|
|
|
|
'info': 20,
|
|
|
|
'debug': 10
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
def __expandconfig(config):
|
|
|
|
""" add the dirs to the config automatically """
|
|
|
|
basepath = os.path.expanduser(config.get('common','base'))
|
|
|
|
config.set('common', 'basedir', basepath)
|
|
|
|
for section in ['source', 'target']:
|
|
|
|
for option in config.options(section):
|
|
|
|
opt = config.get(section, option)
|
|
|
|
config.set(section, "%sdir" % option, os.path.join(basepath,opt))
|
|
|
|
config.set('target', 'filesdir', os.path.join(
|
|
|
|
config.get('target', 'builddir'),
|
|
|
|
config.get('source', 'files'),
|
|
|
|
))
|
2017-06-04 11:38:36 +01:00
|
|
|
config.set('target', 'commentsdir', os.path.join(
|
|
|
|
config.get('target', 'builddir'),
|
|
|
|
config.get('site', 'commentspath'),
|
|
|
|
))
|
2017-05-23 11:14:47 +01:00
|
|
|
return config
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-06-12 15:17:29 +01:00
|
|
|
def baseN(num, b=36, numerals="0123456789abcdefghijklmnopqrstuvwxyz"):
|
2017-06-12 15:40:30 +01:00
|
|
|
""" Used to create short, lowercase slug for a number (an epoch) passed """
|
2017-06-12 15:17:29 +01:00
|
|
|
num = int(num)
|
|
|
|
return ((num == 0) and numerals[0]) or (
|
|
|
|
baseN(
|
|
|
|
num // b,
|
|
|
|
b,
|
|
|
|
numerals
|
|
|
|
).lstrip(numerals[0]) + numerals[num % b]
|
|
|
|
)
|
|
|
|
|
2017-06-28 12:20:26 +01:00
|
|
|
def slugfname(url):
|
2017-07-26 11:23:06 +01:00
|
|
|
return "%s" % slugify(
|
|
|
|
re.sub(r"^https?://(?:www)?", "", url),
|
|
|
|
only_ascii=True,
|
|
|
|
lower=True
|
|
|
|
)[:200]
|
2017-06-12 15:17:29 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
ARROWISO = 'YYYY-MM-DDTHH:mm:ssZ'
|
2017-05-31 13:53:47 +01:00
|
|
|
STRFISO = '%Y-%m-%dT%H:%M:%S%z'
|
2017-05-26 10:14:24 +01:00
|
|
|
|
2017-05-23 11:14:47 +01:00
|
|
|
URLREGEX = re.compile(
|
|
|
|
r'\s+https?\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+'
|
|
|
|
r'\.[a-zA-Z0-9\.\/\?\:@\-_=#]*'
|
|
|
|
)
|
|
|
|
|
|
|
|
EXIFREXEG = re.compile(
|
|
|
|
r'^(?P<year>[0-9]{4}):(?P<month>[0-9]{2}):(?P<day>[0-9]{2})\s+'
|
|
|
|
r'(?P<time>[0-9]{2}:[0-9]{2}:[0-9]{2})$'
|
|
|
|
)
|
|
|
|
|
|
|
|
MDIMGREGEX = re.compile(
|
|
|
|
r'(!\[(.*)\]\((?:\/(?:files|cache)'
|
|
|
|
r'(?:\/[0-9]{4}\/[0-9]{2})?\/(.*\.(?:jpe?g|png|gif)))'
|
|
|
|
r'(?:\s+[\'\"]?(.*?)[\'\"]?)?\)(?:\{(.*?)\})?)'
|
|
|
|
, re.IGNORECASE)
|
|
|
|
|
|
|
|
schema = fields.Schema(
|
|
|
|
url=fields.ID(
|
|
|
|
stored=True,
|
|
|
|
unique=True
|
|
|
|
),
|
2017-07-17 14:21:28 +01:00
|
|
|
category=fields.TEXT(
|
2017-05-23 11:14:47 +01:00
|
|
|
stored=True,
|
|
|
|
),
|
|
|
|
date=fields.DATETIME(
|
|
|
|
stored=True,
|
|
|
|
sortable=True
|
|
|
|
),
|
2017-07-17 14:21:28 +01:00
|
|
|
title=fields.TEXT(
|
2017-05-23 11:14:47 +01:00
|
|
|
stored=True,
|
2017-05-31 13:53:47 +01:00
|
|
|
analyzer=analysis.FancyAnalyzer()
|
2017-05-23 11:14:47 +01:00
|
|
|
),
|
|
|
|
weight=fields.NUMERIC(
|
|
|
|
sortable=True
|
|
|
|
),
|
|
|
|
img=fields.TEXT(
|
|
|
|
stored=True
|
2017-05-26 14:52:30 +01:00
|
|
|
),
|
2017-07-17 14:21:28 +01:00
|
|
|
content=fields.TEXT(
|
|
|
|
stored=True,
|
|
|
|
analyzer=analysis.FancyAnalyzer()
|
|
|
|
),
|
|
|
|
fuzzy=fields.NGRAMWORDS(
|
|
|
|
tokenizer=analysis.NgramTokenizer(4)
|
|
|
|
),
|
2017-05-26 14:52:30 +01:00
|
|
|
mtime=fields.NUMERIC(
|
|
|
|
stored=True
|
2017-05-23 11:14:47 +01:00
|
|
|
)
|
2017-07-17 14:21:28 +01:00
|
|
|
#slug=fields.NGRAMWORDS(
|
|
|
|
#tokenizer=analysis.NgramTokenizer(4)
|
|
|
|
#),
|
|
|
|
#reactions=fields.NGRAMWORDS(
|
|
|
|
#tokenizer=analysis.NgramTokenizer(4)
|
|
|
|
#),
|
|
|
|
#tags=fields.TEXT(
|
|
|
|
#stored=False,
|
|
|
|
#analyzer=analysis.KeywordAnalyzer(
|
|
|
|
#lowercase=True,
|
|
|
|
#commas=True
|
|
|
|
#),
|
|
|
|
#),
|
2017-05-23 11:14:47 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
config = configparser.ConfigParser(
|
|
|
|
interpolation=configparser.ExtendedInterpolation(),
|
|
|
|
allow_no_value=True
|
|
|
|
)
|
|
|
|
config.read('config.ini')
|
|
|
|
config = __expandconfig(config)
|
2017-05-26 10:14:24 +01:00
|
|
|
|
|
|
|
class CMDLine(object):
|
|
|
|
def __init__(self, executable):
|
|
|
|
self.executable = self._which(executable)
|
|
|
|
if self.executable is None:
|
|
|
|
raise OSError('No %s found in PATH!' % executable)
|
|
|
|
return
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
@staticmethod
|
|
|
|
def _which(name):
|
|
|
|
for d in os.environ['PATH'].split(':'):
|
|
|
|
which = glob.glob(os.path.join(d, name), recursive=True)
|
|
|
|
if which:
|
|
|
|
return which.pop()
|
|
|
|
return None
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
def __enter__(self):
|
|
|
|
self.process = subprocess.Popen(
|
|
|
|
[self.executable, "-stay_open", "True", "-@", "-"],
|
|
|
|
universal_newlines=True,
|
2017-06-02 11:19:55 +01:00
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
stderr=subprocess.PIPE
|
|
|
|
)
|
2017-05-26 10:14:24 +01:00
|
|
|
return self
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
def __exit__(self, exc_type, exc_value, traceback):
|
|
|
|
self.process.stdin.write("-stay_open\nFalse\n")
|
|
|
|
self.process.stdin.flush()
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
def execute(self, *args):
|
|
|
|
args = args + ("-execute\n",)
|
|
|
|
self.process.stdin.write(str.join("\n", args))
|
|
|
|
self.process.stdin.flush()
|
|
|
|
output = ""
|
|
|
|
fd = self.process.stdout.fileno()
|
|
|
|
while not output.endswith(self.sentinel):
|
|
|
|
output += os.read(fd, 4096).decode('utf-8', errors='ignore')
|
|
|
|
return output[:-len(self.sentinel)]
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
class Pandoc(CMDLine):
|
2017-05-31 13:53:47 +01:00
|
|
|
""" Pandoc command line call with piped in- and output """
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
def __init__(self, md2html=True):
|
|
|
|
super().__init__('pandoc')
|
2017-07-17 14:21:28 +01:00
|
|
|
if True == md2html:
|
2017-05-26 10:14:24 +01:00
|
|
|
self.i = "markdown+" + "+".join([
|
|
|
|
'backtick_code_blocks',
|
|
|
|
'auto_identifiers',
|
|
|
|
'fenced_code_attributes',
|
|
|
|
'definition_lists',
|
|
|
|
'grid_tables',
|
|
|
|
'pipe_tables',
|
|
|
|
'strikeout',
|
|
|
|
'superscript',
|
|
|
|
'subscript',
|
|
|
|
'markdown_in_html_blocks',
|
|
|
|
'shortcut_reference_links',
|
|
|
|
'autolink_bare_uris',
|
|
|
|
'raw_html',
|
|
|
|
'link_attributes',
|
|
|
|
'header_attributes',
|
|
|
|
'footnotes',
|
|
|
|
])
|
|
|
|
self.o = 'html5'
|
2017-07-17 14:21:28 +01:00
|
|
|
elif 'plain' == md2html:
|
|
|
|
self.i = "markdown+" + "+".join([
|
|
|
|
'backtick_code_blocks',
|
|
|
|
'auto_identifiers',
|
|
|
|
'fenced_code_attributes',
|
|
|
|
'definition_lists',
|
|
|
|
'grid_tables',
|
|
|
|
'pipe_tables',
|
|
|
|
'strikeout',
|
|
|
|
'superscript',
|
|
|
|
'subscript',
|
|
|
|
'markdown_in_html_blocks',
|
|
|
|
'shortcut_reference_links',
|
|
|
|
'autolink_bare_uris',
|
|
|
|
'raw_html',
|
|
|
|
'link_attributes',
|
|
|
|
'header_attributes',
|
|
|
|
'footnotes',
|
|
|
|
])
|
|
|
|
self.o = "plain"
|
2017-05-26 10:14:24 +01:00
|
|
|
else:
|
|
|
|
self.o = "markdown-" + "-".join([
|
|
|
|
'raw_html',
|
|
|
|
'native_divs',
|
|
|
|
'native_spans',
|
|
|
|
])
|
|
|
|
self.i = 'html'
|
|
|
|
|
2017-06-12 15:40:30 +01:00
|
|
|
|
2017-05-26 10:14:24 +01:00
|
|
|
def convert(self, text):
|
|
|
|
cmd = (
|
|
|
|
self.executable,
|
|
|
|
'-o-',
|
|
|
|
'--from=%s' % self.i,
|
|
|
|
'--to=%s' % self.o
|
|
|
|
)
|
2017-05-31 13:53:47 +01:00
|
|
|
logging.debug('converting string with Pandoc')
|
2017-05-26 10:14:24 +01:00
|
|
|
p = subprocess.Popen(
|
|
|
|
cmd,
|
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
)
|
|
|
|
|
|
|
|
stdout, stderr = p.communicate(input=text.encode())
|
|
|
|
if stderr:
|
2017-05-31 13:53:47 +01:00
|
|
|
logging.error(
|
|
|
|
"Error during pandoc covert:\n\t%s\n\t%s",
|
|
|
|
cmd,
|
|
|
|
stderr
|
|
|
|
)
|
2017-05-26 10:14:24 +01:00
|
|
|
return stdout.decode('utf-8').strip()
|