import configparser import os import re import glob import logging import subprocess import json import requests from urllib.parse import urlparse, urlunparse from whoosh import fields from whoosh import analysis from slugify import slugify LLEVEL = { 'critical': 50, 'error': 40, 'warning': 30, 'info': 20, 'debug': 10 } def __expandconfig(config): """ add the dirs to the config automatically """ basepath = os.path.expanduser(config.get('common','base')) config.set('common', 'basedir', basepath) for section in ['source', 'target']: for option in config.options(section): opt = config.get(section, option) config.set(section, "%sdir" % option, os.path.join(basepath,opt)) config.set('target', 'filesdir', os.path.join( config.get('target', 'builddir'), config.get('source', 'files'), )) config.set('target', 'commentsdir', os.path.join( config.get('target', 'builddir'), config.get('site', 'commentspath'), )) return config def baseN(num, b=36, numerals="0123456789abcdefghijklmnopqrstuvwxyz"): """ Used to create short, lowercase slug for a number (an epoch) passed """ num = int(num) return ((num == 0) and numerals[0]) or ( baseN( num // b, b, numerals ).lstrip(numerals[0]) + numerals[num % b] ) def slugfname(url): return "%s" % slugify( re.sub(r"^https?://(?:www)?", "", url), only_ascii=True, lower=True )[:200] ARROWISO = 'YYYY-MM-DDTHH:mm:ssZ' STRFISO = '%Y-%m-%dT%H:%M:%S%z' URLREGEX = re.compile( r'\s+https?\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+' r'\.[a-zA-Z0-9\.\/\?\:@\-_=#]*' ) EXIFREXEG = re.compile( r'^(?P[0-9]{4}):(?P[0-9]{2}):(?P[0-9]{2})\s+' r'(?P