nasg/shared.py

220 lines
5.7 KiB
Python
Raw Normal View History

2017-05-23 11:14:47 +01:00
import configparser
import os
import re
import glob
import logging
import subprocess
2017-06-28 12:20:26 +01:00
import json
2017-05-23 11:14:47 +01:00
from whoosh import fields
from whoosh import analysis
2017-06-28 12:20:26 +01:00
from slugify import slugify
2017-06-12 15:40:30 +01:00
2017-07-05 22:09:06 +01:00
LLEVEL = {
'critical': 50,
'error': 40,
'warning': 30,
'info': 20,
'debug': 10
}
2017-05-23 11:14:47 +01:00
def __expandconfig(config):
""" add the dirs to the config automatically """
basepath = os.path.expanduser(config.get('common','base'))
config.set('common', 'basedir', basepath)
for section in ['source', 'target']:
for option in config.options(section):
opt = config.get(section, option)
config.set(section, "%sdir" % option, os.path.join(basepath,opt))
config.set('target', 'filesdir', os.path.join(
config.get('target', 'builddir'),
config.get('source', 'files'),
))
config.set('target', 'commentsdir', os.path.join(
config.get('target', 'builddir'),
config.get('site', 'commentspath'),
))
2017-05-23 11:14:47 +01:00
return config
2017-06-12 15:40:30 +01:00
2017-06-12 15:17:29 +01:00
def baseN(num, b=36, numerals="0123456789abcdefghijklmnopqrstuvwxyz"):
2017-06-12 15:40:30 +01:00
""" Used to create short, lowercase slug for a number (an epoch) passed """
2017-06-12 15:17:29 +01:00
num = int(num)
return ((num == 0) and numerals[0]) or (
baseN(
num // b,
b,
numerals
).lstrip(numerals[0]) + numerals[num % b]
)
2017-06-28 12:20:26 +01:00
def slugfname(url):
return "%s" % slugify(re.sub(r"^https?://(?:www)?", "", url))[:200]
2017-06-12 15:17:29 +01:00
ARROWISO = 'YYYY-MM-DDTHH:mm:ssZ'
STRFISO = '%Y-%m-%dT%H:%M:%S%z'
2017-05-23 11:14:47 +01:00
URLREGEX = re.compile(
r'\s+https?\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+'
r'\.[a-zA-Z0-9\.\/\?\:@\-_=#]*'
)
EXIFREXEG = re.compile(
r'^(?P<year>[0-9]{4}):(?P<month>[0-9]{2}):(?P<day>[0-9]{2})\s+'
r'(?P<time>[0-9]{2}:[0-9]{2}:[0-9]{2})$'
)
MDIMGREGEX = re.compile(
r'(!\[(.*)\]\((?:\/(?:files|cache)'
r'(?:\/[0-9]{4}\/[0-9]{2})?\/(.*\.(?:jpe?g|png|gif)))'
r'(?:\s+[\'\"]?(.*?)[\'\"]?)?\)(?:\{(.*?)\})?)'
, re.IGNORECASE)
schema = fields.Schema(
url=fields.ID(
stored=True,
unique=True
),
title=fields.TEXT(
stored=True,
analyzer=analysis.FancyAnalyzer()
2017-05-23 11:14:47 +01:00
),
date=fields.DATETIME(
stored=True,
sortable=True
),
content=fields.TEXT(
stored=True,
analyzer=analysis.FancyAnalyzer()
2017-05-23 11:14:47 +01:00
),
fuzzy=fields.NGRAMWORDS(
tokenizer=analysis.NgramTokenizer(4)
),
2017-05-23 11:14:47 +01:00
tags=fields.TEXT(
stored=True,
analyzer=analysis.KeywordAnalyzer(
lowercase=True,
commas=True
),
2017-05-23 11:14:47 +01:00
),
weight=fields.NUMERIC(
sortable=True
),
img=fields.TEXT(
stored=True
),
mtime=fields.NUMERIC(
stored=True
2017-05-23 11:14:47 +01:00
)
)
config = configparser.ConfigParser(
interpolation=configparser.ExtendedInterpolation(),
allow_no_value=True
)
config.read('config.ini')
config = __expandconfig(config)
class CMDLine(object):
def __init__(self, executable):
self.executable = self._which(executable)
if self.executable is None:
raise OSError('No %s found in PATH!' % executable)
return
2017-06-12 15:40:30 +01:00
@staticmethod
def _which(name):
for d in os.environ['PATH'].split(':'):
which = glob.glob(os.path.join(d, name), recursive=True)
if which:
return which.pop()
return None
2017-06-12 15:40:30 +01:00
def __enter__(self):
self.process = subprocess.Popen(
[self.executable, "-stay_open", "True", "-@", "-"],
universal_newlines=True,
2017-06-02 11:19:55 +01:00
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
return self
2017-06-12 15:40:30 +01:00
def __exit__(self, exc_type, exc_value, traceback):
self.process.stdin.write("-stay_open\nFalse\n")
self.process.stdin.flush()
2017-06-12 15:40:30 +01:00
def execute(self, *args):
args = args + ("-execute\n",)
self.process.stdin.write(str.join("\n", args))
self.process.stdin.flush()
output = ""
fd = self.process.stdout.fileno()
while not output.endswith(self.sentinel):
output += os.read(fd, 4096).decode('utf-8', errors='ignore')
return output[:-len(self.sentinel)]
2017-06-12 15:40:30 +01:00
class Pandoc(CMDLine):
""" Pandoc command line call with piped in- and output """
2017-06-12 15:40:30 +01:00
def __init__(self, md2html=True):
super().__init__('pandoc')
if md2html:
self.i = "markdown+" + "+".join([
'backtick_code_blocks',
'auto_identifiers',
'fenced_code_attributes',
'definition_lists',
'grid_tables',
'pipe_tables',
'strikeout',
'superscript',
'subscript',
'markdown_in_html_blocks',
'shortcut_reference_links',
'autolink_bare_uris',
'raw_html',
'link_attributes',
'header_attributes',
'footnotes',
])
self.o = 'html5'
else:
self.o = "markdown-" + "-".join([
'raw_html',
'native_divs',
'native_spans',
])
self.i = 'html'
2017-06-12 15:40:30 +01:00
def convert(self, text):
cmd = (
self.executable,
'-o-',
'--from=%s' % self.i,
'--to=%s' % self.o
)
logging.debug('converting string with Pandoc')
p = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = p.communicate(input=text.encode())
if stderr:
logging.error(
"Error during pandoc covert:\n\t%s\n\t%s",
cmd,
stderr
)
return stdout.decode('utf-8').strip()