updated
This commit is contained in:
parent
7c0daa0904
commit
70bd917de4
4 changed files with 386 additions and 67 deletions
303
nasg.py
303
nasg.py
|
@ -16,6 +16,9 @@ import math
|
||||||
import asyncio
|
import asyncio
|
||||||
import csv
|
import csv
|
||||||
import getpass
|
import getpass
|
||||||
|
import quopri
|
||||||
|
import base64
|
||||||
|
import mimetypes
|
||||||
|
|
||||||
import magic
|
import magic
|
||||||
import arrow
|
import arrow
|
||||||
|
@ -33,6 +36,7 @@ from webmentiontools.send import WebmentionSend
|
||||||
from bleach import clean
|
from bleach import clean
|
||||||
from emoji import UNICODE_EMOJI
|
from emoji import UNICODE_EMOJI
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from readability.readability import Document
|
||||||
import shared
|
import shared
|
||||||
|
|
||||||
def splitpath(path):
|
def splitpath(path):
|
||||||
|
@ -89,7 +93,8 @@ class BaseRenderable(object):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def writerendered(self, content):
|
def writerendered(self, content, mtime=None):
|
||||||
|
mtime = mtime or self.mtime
|
||||||
d = os.path.dirname(self.target)
|
d = os.path.dirname(self.target)
|
||||||
if not os.path.isdir(d):
|
if not os.path.isdir(d):
|
||||||
os.mkdir(d)
|
os.mkdir(d)
|
||||||
|
@ -98,7 +103,7 @@ class BaseRenderable(object):
|
||||||
logging.debug('writing %s', self.target)
|
logging.debug('writing %s', self.target)
|
||||||
html.write(content)
|
html.write(content)
|
||||||
html.close()
|
html.close()
|
||||||
os.utime(self.target, (self.mtime, self.mtime))
|
os.utime(self.target, (mtime, mtime))
|
||||||
|
|
||||||
|
|
||||||
class Indexer(object):
|
class Indexer(object):
|
||||||
|
@ -197,14 +202,25 @@ class Indexer(object):
|
||||||
self.writer.commit()
|
self.writer.commit()
|
||||||
|
|
||||||
|
|
||||||
class OfflineCopy(object):
|
class OfflineArchive(object):
|
||||||
def __init__(self, url):
|
# keep in mind that these are frontmattered HTML files with full HTML and embedded images
|
||||||
|
# they can get VERY large
|
||||||
|
def __init__(self, url, content=None, decode_email=False):
|
||||||
self.url = url
|
self.url = url
|
||||||
self.fname = "%s.md" % slugify(re.sub(r"^https?://", "", url))[:200]
|
self.parsed = urllib.parse.urlparse(url)
|
||||||
|
self.fbase = shared.slugfname(url)
|
||||||
|
self.fname = "%s.md" % self.fbase
|
||||||
self.target = os.path.join(
|
self.target = os.path.join(
|
||||||
shared.config.get('source', 'offlinecopiesdir'),
|
shared.config.get('source', 'offlinecopiesdir'),
|
||||||
self.fname
|
self.fname
|
||||||
)
|
)
|
||||||
|
self.targetd = os.path.join(
|
||||||
|
shared.config.get('source', 'offlinecopiesdir'),
|
||||||
|
self.fbase
|
||||||
|
)
|
||||||
|
if not os.path.isdir(self.targetd):
|
||||||
|
os.mkdir(self.targetd)
|
||||||
|
|
||||||
self.fm = frontmatter.loads('')
|
self.fm = frontmatter.loads('')
|
||||||
self.fm.metadata = {
|
self.fm.metadata = {
|
||||||
'url': self.url,
|
'url': self.url,
|
||||||
|
@ -215,36 +231,152 @@ class OfflineCopy(object):
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
|
||||||
})
|
})
|
||||||
|
|
||||||
def __repr__(self):
|
self.skip_fetch = False
|
||||||
return self.fm.content
|
if content:
|
||||||
|
self.skip_fetch = True
|
||||||
|
if decode_email:
|
||||||
|
content = quopri.decodestring(content)
|
||||||
|
content = str(content, 'utf-8', errors='replace')
|
||||||
|
self.fm.content = content
|
||||||
|
#self.tmp = tempfile.mkdtemp(
|
||||||
|
#'offlinearchive_',
|
||||||
|
#dir=tempfile.gettempdir()
|
||||||
|
#)
|
||||||
|
#atexit.register(
|
||||||
|
#shutil.rmtree,
|
||||||
|
#os.path.abspath(self.tmp)
|
||||||
|
#)
|
||||||
|
#self.images = []
|
||||||
|
|
||||||
def write(self):
|
self.exists = os.path.isfile(self.target)
|
||||||
|
|
||||||
|
def _getimage(self, src):
|
||||||
|
imgname, imgext = os.path.splitext(os.path.basename(src))
|
||||||
|
imgtarget = os.path.join(
|
||||||
|
self.targetd,
|
||||||
|
"%s%s" % (slugify(imgname, only_ascii=True, lower=True), imgext)
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
logging.debug('donwloading image %s', src)
|
||||||
|
r = requests.get(
|
||||||
|
src,
|
||||||
|
allow_redirects=True,
|
||||||
|
timeout=60,
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
with open(imgtarget, 'wb') as f:
|
||||||
|
for chunk in r.iter_content():
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
self.fm.content = self.fm.content.replace(
|
||||||
|
src,
|
||||||
|
'%s/%s' % (self.fbase, imgname)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error('pulling image %s failed: %s', src, e)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _get_images(self):
|
||||||
|
logging.debug("trying to save images")
|
||||||
|
soup = BeautifulSoup(self.fm.content, 'lxml')
|
||||||
|
|
||||||
|
embedded = re.compile(r'^data:.*')
|
||||||
|
for img in soup.find_all('img'):
|
||||||
|
src = img.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
if embedded.match(src):
|
||||||
|
continue
|
||||||
|
|
||||||
|
im = urllib.parse.urlparse(src)
|
||||||
|
if not im.scheme:
|
||||||
|
im = im._replace(scheme=self.parsed.scheme)
|
||||||
|
if not im.netloc:
|
||||||
|
im = im._replace(netloc=self.parsed.netloc)
|
||||||
|
|
||||||
|
self._getimage(im.geturl())
|
||||||
|
|
||||||
|
|
||||||
|
#def _getimage(self, src):
|
||||||
|
#tmp = os.path.join(self.tmp, "%s" % slugify(os.path.basename(src))[:200])
|
||||||
|
#try:
|
||||||
|
#r = requests.get(
|
||||||
|
#src,
|
||||||
|
#allow_redirects=True,
|
||||||
|
#timeout=60,
|
||||||
|
#stream=True
|
||||||
|
#)
|
||||||
|
#with open(tmp, 'wb') as f:
|
||||||
|
#for chunk in r.iter_content():
|
||||||
|
#if chunk:
|
||||||
|
#f.write(chunk)
|
||||||
|
|
||||||
|
#logging.debug('trying to embed %s', src)
|
||||||
|
#with open(tmp, 'rb') as imgdata:
|
||||||
|
#data = str(base64.b64encode(imgdata.read()), 'ascii')
|
||||||
|
#mimetype, encoding = mimetypes.guess_type(tmp)
|
||||||
|
#self.fm.content = self.fm.content.replace(
|
||||||
|
#src,
|
||||||
|
#"data:%s;base64,%s" % (mimetype, data)
|
||||||
|
#)
|
||||||
|
#except Exception as e:
|
||||||
|
#logging.error('pulling image %s failed: %s', src, e)
|
||||||
|
#return
|
||||||
|
|
||||||
|
#def _embed_images(self):
|
||||||
|
#logging.debug("trying to embed images")
|
||||||
|
#soup = BeautifulSoup(self.fm.content, 'lxml')
|
||||||
|
|
||||||
|
#embedded = re.compile(r'^data:.*')
|
||||||
|
#for img in soup.find_all('img'):
|
||||||
|
#src = img.get('src')
|
||||||
|
#if not src:
|
||||||
|
#continue
|
||||||
|
#if embedded.match(src):
|
||||||
|
#continue
|
||||||
|
|
||||||
|
#im = urllib.parse.urlparse(src)
|
||||||
|
#if not im.scheme:
|
||||||
|
#im = im._replace(scheme=self.parsed.scheme)
|
||||||
|
#if not im.netloc:
|
||||||
|
#im = im._replace(netloc=self.parsed.netloc)
|
||||||
|
|
||||||
|
#self._getimage(im.geturl())
|
||||||
|
|
||||||
|
|
||||||
|
def save(self):
|
||||||
logging.info(
|
logging.info(
|
||||||
"savig offline copy of\n\t%s to:\n\t%s",
|
"savig offline copy of\n\t%s to:\n\t%s",
|
||||||
self.url,
|
self.url,
|
||||||
self.target
|
self.target
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(self.target, 'wt') as f:
|
with open(self.target, 'wt') as f:
|
||||||
f.write(frontmatter.dumps(self.fm))
|
f.write(frontmatter.dumps(self.fm))
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def archiveorgurl(self):
|
def archiveorgurl(self):
|
||||||
|
logging.debug("trying archive.org for %s", self.url)
|
||||||
a = self.fetch(
|
a = self.fetch(
|
||||||
"http://archive.org/wayback/available?url=%s" % self.url,
|
"http://archive.org/wayback/available?url=%s" % self.url,
|
||||||
)
|
)
|
||||||
if not a:
|
if not a:
|
||||||
|
logging.debug("no entry for %s on archive.org", self.url)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
a = json.loads(a.text)
|
a = json.loads(a.text)
|
||||||
return a.get(
|
aurl = a.get(
|
||||||
'archived_snapshots', {}
|
'archived_snapshots', {}
|
||||||
).get(
|
).get(
|
||||||
'closest', {}
|
'closest', {}
|
||||||
).get(
|
).get(
|
||||||
'url', None
|
'url', None
|
||||||
)
|
)
|
||||||
|
logging.debug("found %s in archive.org for %s", aurl, self.url)
|
||||||
|
self.updateurl(aurl)
|
||||||
|
return self.fetch(aurl)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("archive.org parsing failed: %s", e)
|
logging.error("archive.org parsing failed: %s", e)
|
||||||
return None
|
return None
|
||||||
|
@ -264,24 +396,40 @@ class OfflineCopy(object):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def run(self):
|
def read():
|
||||||
if os.path.isfile(self.target):
|
if os.path.isfile(self.target):
|
||||||
with open(self.target) as f:
|
with open(self.target) as f:
|
||||||
self.fm = frontmatter.loads(f.read())
|
self.fm = frontmatter.loads(f.read())
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
if self.exists:
|
||||||
|
logging.info("offline archive for %s already exists", self.url)
|
||||||
|
return
|
||||||
|
|
||||||
logging.info("prepairing offline copy of %s", self.url)
|
logging.info("prepairing offline copy of %s", self.url)
|
||||||
|
|
||||||
|
if not self.skip_fetch:
|
||||||
r = self.fetch(self.url)
|
r = self.fetch(self.url)
|
||||||
|
|
||||||
|
# in case it's not, try to look for an archive.org url:
|
||||||
if not r:
|
if not r:
|
||||||
|
logging.warning("couldn't get live version of %s, trying archive.org", self.url)
|
||||||
r = self.fetch(self.archiveorgurl)
|
r = self.fetch(self.archiveorgurl)
|
||||||
|
|
||||||
if r:
|
# no live and no archive.org entry :((
|
||||||
if r.url != self.url:
|
# howver, by miracle, I may already have a copy, so skip if it's there already
|
||||||
self.fm.metadata['realurl'] = r.url
|
if not r:
|
||||||
|
logging.error("no live or archive version of %s found :((", self.url)
|
||||||
|
if not self.exists:
|
||||||
|
self.save()
|
||||||
|
return
|
||||||
|
|
||||||
self.fm.content = r.text
|
self.fm.content = r.text
|
||||||
|
|
||||||
self.write()
|
self._get_images()
|
||||||
return
|
self.save()
|
||||||
|
|
||||||
|
|
||||||
class Renderer(object):
|
class Renderer(object):
|
||||||
|
@ -302,9 +450,10 @@ class Renderer(object):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def jinja_filter_date(d, form='%Y-%m-%d %H:%m:%S'):
|
def jinja_filter_date(d, form='%Y-%m-%d %H:%m:%S'):
|
||||||
if d == 'now':
|
if d == 'now':
|
||||||
return arrow.now().strftime(form)
|
d = arrow.now().datetime
|
||||||
if form == 'c':
|
if form == 'c':
|
||||||
form = '%Y-%m-%dT%H:%M:%S%z'
|
return d.isoformat()
|
||||||
|
#form = '%Y-%m-%dT%H:%M:%S%z'
|
||||||
return d.strftime(form)
|
return d.strftime(form)
|
||||||
|
|
||||||
|
|
||||||
|
@ -422,7 +571,7 @@ class Comment(BaseRenderable):
|
||||||
'content': self.content,
|
'content': self.content,
|
||||||
'html': self.html,
|
'html': self.html,
|
||||||
'source': self.source,
|
'source': self.source,
|
||||||
'target': self.target,
|
'target': self.targeturl,
|
||||||
'type': self.meta.get('type', 'webmention'),
|
'type': self.meta.get('type', 'webmention'),
|
||||||
'reacji': self.reacji,
|
'reacji': self.reacji,
|
||||||
'fname': self.fname
|
'fname': self.fname
|
||||||
|
@ -456,34 +605,43 @@ class Comment(BaseRenderable):
|
||||||
return self._source
|
return self._source
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def targeturl(self):
|
||||||
|
if hasattr(self, '_targeturl'):
|
||||||
|
return self._targeturl
|
||||||
|
t = self.meta.get('target', shared.config.get('site', 'url'))
|
||||||
|
self._targeturl = '{p.path}'.format(p=urllib.parse.urlparse(t)).strip('/')
|
||||||
|
return self._targeturl
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def target(self):
|
def target(self):
|
||||||
if hasattr(self, '_target'):
|
if hasattr(self, '_target'):
|
||||||
return self._target
|
return self._target
|
||||||
t = self.meta.get('target', shared.config.get('site', 'url'))
|
|
||||||
self._target = '{p.path}'.format(p=urllib.parse.urlparse(t)).strip('/')
|
|
||||||
return self._target
|
|
||||||
|
|
||||||
|
|
||||||
async def render(self, renderer):
|
|
||||||
logging.info("rendering and saving comment %s", self.fname)
|
|
||||||
targetdir = os.path.abspath(os.path.join(
|
targetdir = os.path.abspath(os.path.join(
|
||||||
shared.config.get('target', 'builddir'),
|
shared.config.get('target', 'builddir'),
|
||||||
shared.config.get('site', 'commentspath'),
|
shared.config.get('site', 'commentspath'),
|
||||||
self.fname
|
self.fname
|
||||||
))
|
))
|
||||||
target = os.path.join(targetdir, 'index.html')
|
|
||||||
|
|
||||||
if not shared.config.getboolean('params', 'force') and os.path.isfile(target):
|
self._target = os.path.join(targetdir, 'index.html')
|
||||||
ttime = int(os.path.getmtime(target))
|
return self._target
|
||||||
|
|
||||||
|
|
||||||
|
async def render(self, renderer):
|
||||||
|
logging.info("rendering and saving comment %s", self.fname)
|
||||||
|
|
||||||
|
if not shared.config.getboolean('params', 'force') and os.path.isfile(self.target):
|
||||||
|
ttime = int(os.path.getmtime(self.target))
|
||||||
logging.debug('ttime is %d mtime is %d', ttime, self.mtime)
|
logging.debug('ttime is %d mtime is %d', ttime, self.mtime)
|
||||||
if ttime == self.mtime:
|
if ttime == self.mtime:
|
||||||
logging.debug('%s exists and up-to-date (lastmod: %d)', target, ttime)
|
logging.debug(
|
||||||
|
'%s exists and up-to-date (lastmod: %d)',
|
||||||
|
self.target,
|
||||||
|
ttime
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
#if not os.path.isdir(targetdir):
|
|
||||||
#os.mkdir(targetdir)
|
|
||||||
|
|
||||||
tmplvars = {
|
tmplvars = {
|
||||||
'reply': self.tmplvars,
|
'reply': self.tmplvars,
|
||||||
'site': renderer.sitevars,
|
'site': renderer.sitevars,
|
||||||
|
@ -719,7 +877,8 @@ class WebImage(object):
|
||||||
self._rssenclosure = {
|
self._rssenclosure = {
|
||||||
'mime': magic.Magic(mime=True).from_file(target['fpath']),
|
'mime': magic.Magic(mime=True).from_file(target['fpath']),
|
||||||
'url': target['url'],
|
'url': target['url'],
|
||||||
'size': os.path.getsize(target['fpath'])
|
'size': os.path.getsize(target['fpath']),
|
||||||
|
'fname': self.fname
|
||||||
}
|
}
|
||||||
return self._rssenclosure
|
return self._rssenclosure
|
||||||
|
|
||||||
|
@ -976,8 +1135,8 @@ class Taxonomy(BaseIter):
|
||||||
|
|
||||||
|
|
||||||
async def render(self, renderer):
|
async def render(self, renderer):
|
||||||
if not self.slug or self.slug is 'None':
|
#if not self.slug or self.slug is 'None':
|
||||||
return
|
#return
|
||||||
|
|
||||||
self.__mkdirs()
|
self.__mkdirs()
|
||||||
page = 1
|
page = 1
|
||||||
|
@ -1031,24 +1190,20 @@ class Taxonomy(BaseIter):
|
||||||
os.utime(target, (self.mtime, self.mtime))
|
os.utime(target, (self.mtime, self.mtime))
|
||||||
|
|
||||||
if 1 == page:
|
if 1 == page:
|
||||||
target = os.path.join(self.feedp, 'index.rss')
|
#target = os.path.join(self.feedp, 'index.rss')
|
||||||
logging.info("rendering RSS feed to %s", target)
|
#logging.info("rendering RSS feed to %s", target)
|
||||||
r = renderer.j2.get_template('rss.html').render(tmplvars)
|
#r = renderer.j2.get_template('rss.html').render(tmplvars)
|
||||||
|
#with open(target, "wt") as html:
|
||||||
|
#html.write(r)
|
||||||
|
#os.utime(target, (self.mtime, self.mtime))
|
||||||
|
|
||||||
|
target = os.path.join(self.feedp, 'index.atom')
|
||||||
|
logging.info("rendering Atom feed to %s", target)
|
||||||
|
r = renderer.j2.get_template('atom.html').render(tmplvars)
|
||||||
with open(target, "wt") as html:
|
with open(target, "wt") as html:
|
||||||
html.write(r)
|
html.write(r)
|
||||||
os.utime(target, (self.mtime, self.mtime))
|
os.utime(target, (self.mtime, self.mtime))
|
||||||
|
|
||||||
if not self.taxonomy or self.taxonomy == 'category':
|
|
||||||
t = shared.config.get('site', 'websuburl')
|
|
||||||
data = {
|
|
||||||
'hub.mode': 'publish',
|
|
||||||
'hub.url': "%s%s" % (
|
|
||||||
shared.config.get('site', 'url'), self.baseurl
|
|
||||||
)
|
|
||||||
}
|
|
||||||
logging.info("pinging %s with data %s", t, data)
|
|
||||||
requests.post(t, data=data)
|
|
||||||
|
|
||||||
# ---
|
# ---
|
||||||
# this is a joke
|
# this is a joke
|
||||||
# see http://indieweb.org/YAMLFeed
|
# see http://indieweb.org/YAMLFeed
|
||||||
|
@ -1081,6 +1236,18 @@ class Taxonomy(BaseIter):
|
||||||
os.utime(target, (self.mtime, self.mtime))
|
os.utime(target, (self.mtime, self.mtime))
|
||||||
# ---
|
# ---
|
||||||
|
|
||||||
|
if 1 == page:
|
||||||
|
if not self.taxonomy or self.taxonomy == 'category':
|
||||||
|
t = shared.config.get('site', 'websuburl')
|
||||||
|
data = {
|
||||||
|
'hub.mode': 'publish',
|
||||||
|
'hub.url': "%s%s" % (
|
||||||
|
shared.config.get('site', 'url'), self.baseurl
|
||||||
|
)
|
||||||
|
}
|
||||||
|
logging.info("pinging %s with data %s", t, data)
|
||||||
|
requests.post(t, data=data)
|
||||||
|
|
||||||
|
|
||||||
class Content(BaseIter):
|
class Content(BaseIter):
|
||||||
def __init__(self, images, comments, extensions=['md']):
|
def __init__(self, images, comments, extensions=['md']):
|
||||||
|
@ -1557,7 +1724,7 @@ class Singular(BaseRenderable):
|
||||||
if not isinstance(maybe, list):
|
if not isinstance(maybe, list):
|
||||||
maybe = [maybe]
|
maybe = [maybe]
|
||||||
for url in maybe:
|
for url in maybe:
|
||||||
copies[url] = OfflineCopy(url)
|
copies[url] = OfflineArchive(url)
|
||||||
copies[url].run()
|
copies[url].run()
|
||||||
|
|
||||||
self.copies = copies
|
self.copies = copies
|
||||||
|
@ -1601,7 +1768,8 @@ class Singular(BaseRenderable):
|
||||||
'slug': self.fname,
|
'slug': self.fname,
|
||||||
'shortslug': self.shortslug,
|
'shortslug': self.shortslug,
|
||||||
'rssenclosure': self.rssenclosure,
|
'rssenclosure': self.rssenclosure,
|
||||||
'copies': self.offlinecopies,
|
#'copies': self.offlinecopies,
|
||||||
|
'copies': [],
|
||||||
'comments': self.comments,
|
'comments': self.comments,
|
||||||
'replies': self.replies,
|
'replies': self.replies,
|
||||||
'reacjis': self.reacjis,
|
'reacjis': self.reacjis,
|
||||||
|
@ -1617,6 +1785,15 @@ class Singular(BaseRenderable):
|
||||||
return self._shortslug
|
return self._shortslug
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def target(self):
|
||||||
|
targetdir = os.path.abspath(os.path.join(
|
||||||
|
shared.config.get('target', 'builddir'),
|
||||||
|
self.fname
|
||||||
|
))
|
||||||
|
return os.path.join(targetdir, 'index.html')
|
||||||
|
|
||||||
|
|
||||||
async def rendercomments(self, renderer):
|
async def rendercomments(self, renderer):
|
||||||
for comment in self.comments:
|
for comment in self.comments:
|
||||||
await comment.render(renderer)
|
await comment.render(renderer)
|
||||||
|
@ -1638,17 +1815,15 @@ class Singular(BaseRenderable):
|
||||||
mtime = lctime
|
mtime = lctime
|
||||||
|
|
||||||
logging.info("rendering and saving %s", self.fname)
|
logging.info("rendering and saving %s", self.fname)
|
||||||
targetdir = os.path.abspath(os.path.join(
|
if not shared.config.getboolean('params', 'force') and os.path.isfile(self.target):
|
||||||
shared.config.get('target', 'builddir'),
|
ttime = int(os.path.getmtime(self.target))
|
||||||
self.fname
|
|
||||||
))
|
|
||||||
target = os.path.join(targetdir, 'index.html')
|
|
||||||
|
|
||||||
if not shared.config.getboolean('params', 'force') and os.path.isfile(target):
|
|
||||||
ttime = int(os.path.getmtime(target))
|
|
||||||
logging.debug('ttime is %d mtime is %d', ttime, mtime)
|
logging.debug('ttime is %d mtime is %d', ttime, mtime)
|
||||||
if ttime == mtime:
|
if ttime == mtime:
|
||||||
logging.debug('%s exists and up-to-date (lastmod: %d)', target, ttime)
|
logging.debug(
|
||||||
|
'%s exists and up-to-date (lastmod: %d)',
|
||||||
|
self.target,
|
||||||
|
ttime
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
tmplvars = {
|
tmplvars = {
|
||||||
|
@ -1657,7 +1832,7 @@ class Singular(BaseRenderable):
|
||||||
'taxonomy': {},
|
'taxonomy': {},
|
||||||
}
|
}
|
||||||
r = renderer.j2.get_template(self.tmplfile).render(tmplvars)
|
r = renderer.j2.get_template(self.tmplfile).render(tmplvars)
|
||||||
self.writerendered(target, r, mtime)
|
self.writerendered(r, mtime)
|
||||||
|
|
||||||
|
|
||||||
async def ping(self, pinger):
|
async def ping(self, pinger):
|
||||||
|
@ -1746,6 +1921,12 @@ class NASG(object):
|
||||||
default=False,
|
default=False,
|
||||||
help='skip rendering'
|
help='skip rendering'
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--refetch',
|
||||||
|
action='store_true',
|
||||||
|
default=False,
|
||||||
|
help='force re-fetching offline archives'
|
||||||
|
)
|
||||||
|
|
||||||
params = vars(parser.parse_args())
|
params = vars(parser.parse_args())
|
||||||
shared.config.add_section('params')
|
shared.config.add_section('params')
|
||||||
|
|
2
new.py
Normal file → Executable file
2
new.py
Normal file → Executable file
|
@ -119,7 +119,7 @@ if __name__ == '__main__':
|
||||||
doc.content = content
|
doc.content = content
|
||||||
|
|
||||||
tmpsave = os.path.join(tempfile.gettempdir(), "%s.md" % slug)
|
tmpsave = os.path.join(tempfile.gettempdir(), "%s.md" % slug)
|
||||||
saveto = input('Save to: [%s]: ' % categories) or tmpsave
|
saveto = input('Save to: [%s]: ' % categories) or 'bookmark'
|
||||||
|
|
||||||
if tmpsave != saveto:
|
if tmpsave != saveto:
|
||||||
saveto = os.path.join(shared.config.get('source', 'contentdir'), saveto, "%s.md" % slug)
|
saveto = os.path.join(shared.config.get('source', 'contentdir'), saveto, "%s.md" % slug)
|
||||||
|
|
75
pesos.py
75
pesos.py
|
@ -14,6 +14,22 @@ from slugify import slugify
|
||||||
|
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|
||||||
|
""" TODO
|
||||||
|
|
||||||
|
- following from:
|
||||||
|
- tumblr
|
||||||
|
- deviantart
|
||||||
|
- flickr
|
||||||
|
- wordpress.com
|
||||||
|
- twitter
|
||||||
|
- 500px
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Bookmark(object):
|
class Bookmark(object):
|
||||||
def __init__(self, title, url, fname=None):
|
def __init__(self, title, url, fname=None):
|
||||||
self.fm = frontmatter.loads('')
|
self.fm = frontmatter.loads('')
|
||||||
|
@ -126,6 +142,37 @@ class Fav(object):
|
||||||
os.utime(self.target, (self.arrow.timestamp, self.arrow.timestamp))
|
os.utime(self.target, (self.arrow.timestamp, self.arrow.timestamp))
|
||||||
|
|
||||||
|
|
||||||
|
class PinterestFav(Fav):
|
||||||
|
def __init__(self, url):
|
||||||
|
super(PinterestFav, self).__init__()
|
||||||
|
self.url = url
|
||||||
|
self.fname = "pinterest-%s.md" % (list(filter(None, url.split('/')))[-1])
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
try:
|
||||||
|
r = requests.get(self.url)
|
||||||
|
soup = bs4.BeautifulSoup(r.text, 'lxml')
|
||||||
|
ld = json.loads(soup.find('script', type='application/ld+json').text)
|
||||||
|
imgurl = ld.get('image')
|
||||||
|
self.saveimg(imgurl)
|
||||||
|
|
||||||
|
self.fm.metadata = {
|
||||||
|
'published': arrow.get(
|
||||||
|
ld.get('datePublished', arrow.utcnow().timestamp)
|
||||||
|
).format(shared.ARROWISO),
|
||||||
|
'title': ld.get('headline', self.url),
|
||||||
|
'favorite-of': self.url,
|
||||||
|
'image': self.imgname
|
||||||
|
}
|
||||||
|
content = ld.get('articleBody', '')
|
||||||
|
content = shared.Pandoc(False).convert(content)
|
||||||
|
self.fm.content = content
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error('saving pinterest fav %s failed: %s', self.url, e)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
class FlickrFav(Fav):
|
class FlickrFav(Fav):
|
||||||
def __init__(self, photo):
|
def __init__(self, photo):
|
||||||
super(FlickrFav, self).__init__()
|
super(FlickrFav, self).__init__()
|
||||||
|
@ -280,6 +327,31 @@ class FivehpxFavs(Favs):
|
||||||
fav.write()
|
fav.write()
|
||||||
|
|
||||||
|
|
||||||
|
#class Following(object):
|
||||||
|
#def __init__(self, confgroup):
|
||||||
|
#self.confgroup = confgroup
|
||||||
|
#self.url = shared.config.get(confgroup, 'fav_api')
|
||||||
|
|
||||||
|
|
||||||
|
#class FlickrFollowing(Following):
|
||||||
|
#def __init__(self):
|
||||||
|
#super(FlickrFollowing, self).__init__('flickr')
|
||||||
|
#self.params = {
|
||||||
|
#'method': 'flickr.contacts.getList',
|
||||||
|
#'api_key': shared.config.get('flickr', 'api_key'),
|
||||||
|
#'format': 'json',
|
||||||
|
#'nojsoncallback': '1',
|
||||||
|
#}
|
||||||
|
|
||||||
|
#def run(self):
|
||||||
|
#r = requests.get(self.url,params=self.params)
|
||||||
|
#js = json.loads(r.text)
|
||||||
|
#pprint(js)
|
||||||
|
#for contact in js.get('contacts', {}).get('contact', []):
|
||||||
|
#pprint(contact)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
while len(logging.root.handlers) > 0:
|
while len(logging.root.handlers) > 0:
|
||||||
logging.root.removeHandler(logging.root.handlers[-1])
|
logging.root.removeHandler(logging.root.handlers[-1])
|
||||||
|
@ -297,3 +369,6 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
fivehpx = FivehpxFavs()
|
fivehpx = FivehpxFavs()
|
||||||
fivehpx.run()
|
fivehpx.run()
|
||||||
|
|
||||||
|
#flickrfollow = FlickrFollowing()
|
||||||
|
#flickrfollow.run()
|
||||||
|
|
65
shared.py
65
shared.py
|
@ -4,9 +4,11 @@ import re
|
||||||
import glob
|
import glob
|
||||||
import logging
|
import logging
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import json
|
||||||
|
|
||||||
from whoosh import fields
|
from whoosh import fields
|
||||||
from whoosh import analysis
|
from whoosh import analysis
|
||||||
|
from slugify import slugify
|
||||||
|
|
||||||
def __expandconfig(config):
|
def __expandconfig(config):
|
||||||
""" add the dirs to the config automatically """
|
""" add the dirs to the config automatically """
|
||||||
|
@ -38,6 +40,8 @@ def baseN(num, b=36, numerals="0123456789abcdefghijklmnopqrstuvwxyz"):
|
||||||
).lstrip(numerals[0]) + numerals[num % b]
|
).lstrip(numerals[0]) + numerals[num % b]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def slugfname(url):
|
||||||
|
return "%s" % slugify(re.sub(r"^https?://(?:www)?", "", url))[:200]
|
||||||
|
|
||||||
ARROWISO = 'YYYY-MM-DDTHH:mm:ssZ'
|
ARROWISO = 'YYYY-MM-DDTHH:mm:ssZ'
|
||||||
STRFISO = '%Y-%m-%dT%H:%M:%S%z'
|
STRFISO = '%Y-%m-%dT%H:%M:%S%z'
|
||||||
|
@ -104,6 +108,65 @@ config.read('config.ini')
|
||||||
config = __expandconfig(config)
|
config = __expandconfig(config)
|
||||||
|
|
||||||
|
|
||||||
|
class TokenDB(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.db = os.path.abspath(os.path.join(
|
||||||
|
config.get('common', 'basedir'),
|
||||||
|
'tokens.json'
|
||||||
|
))
|
||||||
|
self.tokens = {}
|
||||||
|
self.refresh()
|
||||||
|
|
||||||
|
def refresh(self):
|
||||||
|
if os.path.isfile(self.db):
|
||||||
|
with open(self.db, 'rt') as f:
|
||||||
|
self.tokens = json.loads(f.read())
|
||||||
|
|
||||||
|
def save(self):
|
||||||
|
with open(self.db, 'wt') as f:
|
||||||
|
f.write(
|
||||||
|
json.dumps(
|
||||||
|
self.tokens, indent=4, sort_keys=True
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.refresh()
|
||||||
|
|
||||||
|
def get_token(self, token):
|
||||||
|
return self.tokens.get(token, None)
|
||||||
|
|
||||||
|
def get_service(self, service):
|
||||||
|
s = self.tokens.get(service, None)
|
||||||
|
if s:
|
||||||
|
s = self.get_token(s)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def set_service(self, service, token):
|
||||||
|
self.tokens.update({
|
||||||
|
service: token
|
||||||
|
})
|
||||||
|
#self.save()
|
||||||
|
|
||||||
|
def set_token(self, token, secret):
|
||||||
|
self.tokens.update({
|
||||||
|
token: {
|
||||||
|
'oauth_token': token,
|
||||||
|
'oauth_token_secret': secret
|
||||||
|
}
|
||||||
|
})
|
||||||
|
#self.save()
|
||||||
|
|
||||||
|
def set_verifier(self, token, verifier):
|
||||||
|
t = self.tokens.get(token)
|
||||||
|
t.update({
|
||||||
|
'verifier': verifier
|
||||||
|
})
|
||||||
|
self.tokens.update({
|
||||||
|
token: t
|
||||||
|
})
|
||||||
|
#self.save()
|
||||||
|
|
||||||
|
tokendb = TokenDB()
|
||||||
|
|
||||||
class CMDLine(object):
|
class CMDLine(object):
|
||||||
def __init__(self, executable):
|
def __init__(self, executable):
|
||||||
self.executable = self._which(executable)
|
self.executable = self._which(executable)
|
||||||
|
|
Loading…
Reference in a new issue