2018-10-15 14:16:10 +01:00
|
|
|
import os
|
|
|
|
import glob
|
|
|
|
import imghdr
|
|
|
|
import re
|
|
|
|
import logging
|
|
|
|
import shutil
|
|
|
|
import subprocess
|
2018-11-16 22:40:19 +00:00
|
|
|
import json
|
2018-12-27 19:50:38 +00:00
|
|
|
from io import BytesIO
|
2018-10-15 21:57:45 +01:00
|
|
|
import lxml.etree as etree
|
2018-10-15 14:16:10 +01:00
|
|
|
from slugify import slugify
|
|
|
|
import requests
|
2018-11-16 22:40:19 +00:00
|
|
|
from requests.auth import HTTPBasicAuth
|
2018-10-15 14:16:10 +01:00
|
|
|
import arrow
|
|
|
|
import settings
|
2018-11-16 22:40:19 +00:00
|
|
|
import keys
|
2019-07-13 21:01:57 +01:00
|
|
|
import yaml
|
2018-10-15 14:16:10 +01:00
|
|
|
from pprint import pprint
|
|
|
|
|
2019-07-13 21:01:57 +01:00
|
|
|
TMPFEXT = ".xyz"
|
|
|
|
MDFEXT = ".md"
|
|
|
|
|
|
|
|
|
|
|
|
def utfyamldump(data):
|
|
|
|
""" dump YAML with actual UTF-8 chars """
|
|
|
|
return yaml.dump(data, default_flow_style=False, indent=4, allow_unicode=True)
|
|
|
|
|
2018-10-15 14:16:10 +01:00
|
|
|
|
|
|
|
def slugfname(url):
|
2019-07-13 21:01:57 +01:00
|
|
|
return slugify(re.sub(r"^https?://(?:www)?", "", url), only_ascii=True, lower=True)[
|
|
|
|
:200
|
|
|
|
]
|
|
|
|
|
2018-10-15 14:16:10 +01:00
|
|
|
|
2018-10-16 08:59:52 +01:00
|
|
|
class cached_property(object):
|
|
|
|
""" extermely simple cached_property decorator:
|
|
|
|
whenever something is called as @cached_property, on first run, the
|
|
|
|
result is calculated, then the class method is overwritten to be
|
|
|
|
a property, contaning the result from the method
|
|
|
|
"""
|
2019-07-13 21:01:57 +01:00
|
|
|
|
2018-10-16 08:59:52 +01:00
|
|
|
def __init__(self, method, name=None):
|
|
|
|
self.method = method
|
|
|
|
self.name = name or method.__name__
|
2018-11-03 09:49:45 +00:00
|
|
|
|
2018-10-16 08:59:52 +01:00
|
|
|
def __get__(self, inst, cls):
|
|
|
|
if inst is None:
|
|
|
|
return self
|
|
|
|
result = self.method(inst)
|
|
|
|
setattr(inst, self.name, result)
|
|
|
|
return result
|
|
|
|
|
2019-07-13 21:01:57 +01:00
|
|
|
|
2018-12-27 19:50:38 +00:00
|
|
|
class Follows(dict):
|
2018-10-15 21:57:45 +01:00
|
|
|
def __init__(self):
|
2019-07-13 21:01:57 +01:00
|
|
|
self.auth = HTTPBasicAuth(
|
|
|
|
keys.miniflux.get("username"), keys.miniflux.get("token")
|
2018-11-16 22:40:19 +00:00
|
|
|
)
|
|
|
|
|
2018-12-27 19:50:38 +00:00
|
|
|
@property
|
|
|
|
def subscriptions(self):
|
2018-11-16 22:40:19 +00:00
|
|
|
feeds = []
|
2019-07-13 21:01:57 +01:00
|
|
|
params = {"jsonrpc": "2.0", "method": "getFeeds", "id": keys.miniflux.get("id")}
|
2018-11-16 22:40:19 +00:00
|
|
|
r = requests.post(
|
2019-07-13 21:01:57 +01:00
|
|
|
keys.miniflux.get("url"), data=json.dumps(params), auth=self.auth
|
2018-11-16 22:40:19 +00:00
|
|
|
)
|
2019-07-13 21:01:57 +01:00
|
|
|
return r.json().get("result", [])
|
2018-12-27 19:50:38 +00:00
|
|
|
|
|
|
|
def sync(self):
|
|
|
|
current = []
|
|
|
|
for feed in self.subscriptions:
|
2018-11-16 22:40:19 +00:00
|
|
|
try:
|
2019-07-13 21:01:57 +01:00
|
|
|
current.append(feed["feed_url"])
|
2018-11-16 22:40:19 +00:00
|
|
|
except Exception as e:
|
2019-07-13 21:01:57 +01:00
|
|
|
logging.error("problem with feed entry: %s", feed)
|
2018-12-27 19:50:38 +00:00
|
|
|
for silo, feeds in self.items():
|
|
|
|
for feed in feeds:
|
2019-07-13 21:01:57 +01:00
|
|
|
xmlurl = feed.get("xmlUrl")
|
2018-12-27 19:50:38 +00:00
|
|
|
if len(xmlurl) and xmlurl not in current:
|
2019-07-13 21:01:57 +01:00
|
|
|
logging.info("creating subscription for: %s", feed)
|
2018-11-16 22:40:19 +00:00
|
|
|
params = {
|
2019-07-13 21:01:57 +01:00
|
|
|
"jsonrpc": "2.0",
|
|
|
|
"method": "createFeed",
|
|
|
|
"id": keys.miniflux.get("id"),
|
|
|
|
"params": {"url": xmlurl, "group_name": silo},
|
2018-11-16 22:40:19 +00:00
|
|
|
}
|
|
|
|
r = requests.post(
|
2019-07-13 21:01:57 +01:00
|
|
|
keys.miniflux.get("url"),
|
2018-11-16 22:40:19 +00:00
|
|
|
data=json.dumps(params),
|
|
|
|
auth=self.auth,
|
|
|
|
)
|
2018-10-15 21:57:45 +01:00
|
|
|
|
|
|
|
def export(self):
|
2018-12-27 19:50:38 +00:00
|
|
|
opml = etree.Element("opml", version="1.0")
|
|
|
|
xmldoc = etree.ElementTree(opml)
|
|
|
|
opml.addprevious(
|
|
|
|
etree.ProcessingInstruction(
|
|
|
|
"xml-stylesheet",
|
2019-07-13 21:01:57 +01:00
|
|
|
'type="text/xsl" href="%s"' % (settings.opml.get("xsl")),
|
2018-12-27 19:50:38 +00:00
|
|
|
)
|
|
|
|
)
|
2018-10-15 21:57:45 +01:00
|
|
|
head = etree.SubElement(opml, "head")
|
2019-07-13 21:01:57 +01:00
|
|
|
title = etree.SubElement(head, "title").text = settings.opml.get("title")
|
|
|
|
dt = etree.SubElement(head, "dateCreated").text = arrow.utcnow().format(
|
|
|
|
"ddd, DD MMM YYYY HH:mm:ss UTC"
|
|
|
|
)
|
|
|
|
owner = etree.SubElement(head, "ownerName").text = settings.opml.get("owner")
|
|
|
|
email = etree.SubElement(head, "ownerEmail").text = settings.opml.get("email")
|
2018-10-15 21:57:45 +01:00
|
|
|
|
|
|
|
body = etree.SubElement(opml, "body")
|
2018-12-27 19:50:38 +00:00
|
|
|
groups = {}
|
|
|
|
for feed in self.subscriptions:
|
|
|
|
# contains sensitive data, skip it
|
2019-07-13 21:01:57 +01:00
|
|
|
if "sessionid" in feed.get("feed_url") or "sessionid" in feed.get(
|
|
|
|
"site_url"
|
|
|
|
):
|
2018-12-27 19:50:38 +00:00
|
|
|
continue
|
|
|
|
|
2019-07-13 21:01:57 +01:00
|
|
|
fgroup = feed.get("groups", None)
|
2018-12-27 19:50:38 +00:00
|
|
|
if not fgroup:
|
2019-07-13 21:01:57 +01:00
|
|
|
fgroup = [{"title": "Unknown", "id": -1}]
|
2018-12-27 19:50:38 +00:00
|
|
|
fgroup = fgroup.pop()
|
|
|
|
# some groups need to be skipped
|
2019-07-13 21:01:57 +01:00
|
|
|
if fgroup["title"].lower() in ["private"]:
|
2018-12-27 19:50:38 +00:00
|
|
|
continue
|
2019-07-13 21:01:57 +01:00
|
|
|
if fgroup["title"] not in groups.keys():
|
|
|
|
groups[fgroup["title"]] = etree.SubElement(
|
|
|
|
body, "outline", text=fgroup["title"]
|
|
|
|
)
|
2018-12-27 19:50:38 +00:00
|
|
|
entry = etree.SubElement(
|
2019-07-13 21:01:57 +01:00
|
|
|
groups[fgroup["title"]],
|
2018-12-27 19:50:38 +00:00
|
|
|
"outline",
|
|
|
|
type="rss",
|
2019-07-13 21:01:57 +01:00
|
|
|
text=feed.get("title"),
|
|
|
|
xmlUrl=feed.get("feed_url"),
|
|
|
|
htmlUrl=feed.get("site_url"),
|
2018-12-27 19:50:38 +00:00
|
|
|
)
|
2018-10-15 21:57:45 +01:00
|
|
|
|
2019-07-13 21:01:57 +01:00
|
|
|
opmlfile = os.path.join(settings.paths.get("content"), "following.opml")
|
2018-10-15 21:57:45 +01:00
|
|
|
|
2019-07-13 21:01:57 +01:00
|
|
|
with open(opmlfile, "wb") as f:
|
2018-12-27 19:50:38 +00:00
|
|
|
f.write(
|
|
|
|
etree.tostring(
|
2019-07-13 21:01:57 +01:00
|
|
|
xmldoc, encoding="utf-8", xml_declaration=True, pretty_print=True
|
2018-12-27 19:50:38 +00:00
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2018-10-15 21:57:45 +01:00
|
|
|
|
2018-10-15 14:16:10 +01:00
|
|
|
class Favs(object):
|
|
|
|
def __init__(self, silo):
|
|
|
|
self.silo = silo
|
|
|
|
|
2018-10-15 21:57:45 +01:00
|
|
|
@property
|
|
|
|
def feeds(self):
|
|
|
|
return []
|
|
|
|
|
2018-10-15 14:16:10 +01:00
|
|
|
@property
|
|
|
|
def since(self):
|
2019-07-13 21:01:57 +01:00
|
|
|
d = os.path.join(settings.paths.get("archive"), "favorite", "%s*" % self.silo)
|
2018-10-15 14:16:10 +01:00
|
|
|
files = glob.glob(d)
|
2018-10-16 08:59:52 +01:00
|
|
|
if len(files):
|
|
|
|
mtime = max([int(os.path.getmtime(f)) for f in files])
|
|
|
|
else:
|
|
|
|
mtime = 0
|
2018-10-15 14:16:10 +01:00
|
|
|
return mtime
|
|
|
|
|
|
|
|
|
|
|
|
class ImgFav(object):
|
|
|
|
def __init__(self):
|
|
|
|
return
|
|
|
|
|
2019-07-13 21:01:57 +01:00
|
|
|
def run(self):
|
|
|
|
if not self.exists:
|
|
|
|
self.fetch_images()
|
|
|
|
self.save_txt()
|
|
|
|
|
|
|
|
@property
|
|
|
|
def exists(self):
|
|
|
|
maybe = glob.glob("%s*" % self.targetprefix)
|
|
|
|
if len(maybe):
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def save_txt(self):
|
|
|
|
attachments = [os.path.basename(fn) for fn in glob.glob("%s*" % self.targetprefix)
|
|
|
|
if not os.path.basename(fn).endswith('.md')]
|
|
|
|
meta = {
|
|
|
|
"title": self.title,
|
|
|
|
"favorite-of": self.url,
|
|
|
|
"date": str(self.published),
|
|
|
|
"sources": list(self.images.values()),
|
|
|
|
"attachments": attachments,
|
|
|
|
"author": self.author,
|
|
|
|
}
|
|
|
|
r = "---\n%s\n---\n\n" % (utfyamldump(meta))
|
|
|
|
with open("%s%s" % (self.targetprefix, MDFEXT), "wt") as fpath:
|
|
|
|
fpath.write(r)
|
|
|
|
|
2018-10-15 14:16:10 +01:00
|
|
|
def fetch_images(self):
|
|
|
|
for fpath, url in self.images.items():
|
|
|
|
self.fetch_image(fpath, url)
|
|
|
|
|
|
|
|
def fetch_image(self, fpath, url):
|
|
|
|
logging.info("pulling image %s to %s", url, fpath)
|
|
|
|
r = requests.get(url, stream=True)
|
|
|
|
if r.status_code == 200:
|
2019-07-13 21:01:57 +01:00
|
|
|
with open(fpath, "wb") as f:
|
2018-10-15 14:16:10 +01:00
|
|
|
r.raw.decode_content = True
|
|
|
|
shutil.copyfileobj(r.raw, f)
|
|
|
|
|
|
|
|
imgtype = imghdr.what(fpath)
|
|
|
|
if not imgtype:
|
|
|
|
os.remove(fpath)
|
|
|
|
return
|
2019-07-13 21:01:57 +01:00
|
|
|
if imgtype in ["jpg", "jpeg", "png"]:
|
2018-10-15 14:16:10 +01:00
|
|
|
self.write_exif(fpath)
|
|
|
|
os.rename(fpath, fpath.replace(TMPFEXT, ".%s" % (imgtype)))
|
|
|
|
|
|
|
|
def write_exif(self, fpath):
|
2019-07-13 21:01:57 +01:00
|
|
|
logging.info("populating EXIF data of %s" % fpath)
|
2018-10-15 14:16:10 +01:00
|
|
|
|
|
|
|
geo_lat = False
|
|
|
|
geo_lon = False
|
|
|
|
|
2019-07-13 21:01:57 +01:00
|
|
|
if hasattr(self, "geo") and self.geo != None:
|
2018-10-15 14:16:10 +01:00
|
|
|
lat, lon = self.geo
|
2019-07-13 21:01:57 +01:00
|
|
|
if lat and lon and "null" != lat and "null" != lon:
|
2018-10-15 14:16:10 +01:00
|
|
|
geo_lat = lat
|
|
|
|
geo_lon = lon
|
|
|
|
|
|
|
|
params = [
|
2019-07-13 21:01:57 +01:00
|
|
|
"exiftool",
|
|
|
|
"-overwrite_original",
|
|
|
|
"-XMP:Copyright=Copyright %s %s (%s)"
|
|
|
|
% (
|
|
|
|
self.published.to("utc").format("YYYY"),
|
|
|
|
self.author.get("name"),
|
|
|
|
self.author.get("url"),
|
2018-10-15 14:16:10 +01:00
|
|
|
),
|
2019-07-13 21:01:57 +01:00
|
|
|
"-XMP:Source=%s" % self.url,
|
|
|
|
"-XMP:ReleaseDate=%s"
|
|
|
|
% self.published.to("utc").format("YYYY:MM:DD HH:mm:ss"),
|
|
|
|
"-XMP:Headline=%s" % self.title,
|
|
|
|
"-XMP:Description=%s" % self.content,
|
2018-10-15 14:16:10 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
for t in self.tags:
|
2019-07-13 21:01:57 +01:00
|
|
|
params.append("-XMP:HierarchicalSubject+=%s" % t)
|
|
|
|
params.append("-XMP:Subject+=%s" % t)
|
2018-10-15 14:16:10 +01:00
|
|
|
|
|
|
|
if geo_lat and geo_lon:
|
|
|
|
geo_lat = round(float(geo_lat), 6)
|
|
|
|
geo_lon = round(float(geo_lon), 6)
|
|
|
|
|
|
|
|
if geo_lat < 0:
|
2019-07-13 21:01:57 +01:00
|
|
|
GPSLatitudeRef = "S"
|
2018-10-15 14:16:10 +01:00
|
|
|
else:
|
2019-07-13 21:01:57 +01:00
|
|
|
GPSLatitudeRef = "N"
|
2018-10-15 14:16:10 +01:00
|
|
|
|
|
|
|
if geo_lon < 0:
|
2019-07-13 21:01:57 +01:00
|
|
|
GPSLongitudeRef = "W"
|
2018-10-15 14:16:10 +01:00
|
|
|
else:
|
2019-07-13 21:01:57 +01:00
|
|
|
GPSLongitudeRef = "E"
|
2018-10-15 14:16:10 +01:00
|
|
|
|
2019-07-13 21:01:57 +01:00
|
|
|
params.append("-GPSLongitude=%s" % abs(geo_lon))
|
|
|
|
params.append("-GPSLatitude=%s" % abs(geo_lat))
|
|
|
|
params.append("-GPSLongitudeRef=%s" % GPSLongitudeRef)
|
|
|
|
params.append("-GPSLatitudeRef=%s" % GPSLatitudeRef)
|
2018-10-15 14:16:10 +01:00
|
|
|
|
|
|
|
params.append(fpath)
|
|
|
|
|
|
|
|
p = subprocess.Popen(
|
|
|
|
params,
|
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
)
|
|
|
|
|
|
|
|
stdout, stderr = p.communicate()
|
2019-07-13 21:01:57 +01:00
|
|
|
_original = "%s_original" % fpath
|
2018-10-15 14:16:10 +01:00
|
|
|
if os.path.exists(_original):
|
|
|
|
os.unlink(_original)
|