I lost track of changes, this is a commit that has to be followed by a large cleanup

This commit is contained in:
Peter Molnar 2020-05-06 13:27:49 +01:00
parent 6a3b127245
commit d5665d15e7
11 changed files with 401 additions and 342 deletions

7
.gitignore vendored
View file

@ -1,7 +1,6 @@
keys.py keys.py
.venv
__pycache
keys.py
__pycache__
lib lib
__pycache
__pycache__
_scratch _scratch
.venv

View file

@ -7,34 +7,51 @@ import requests
import keys import keys
import common import common
import settings import settings
from time import sleep
from math import ceil from math import ceil
import random
from pprint import pprint from pprint import pprint
class ASFavs(common.Favs): class ASFavs(common.Favs):
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
}
def __init__(self): def __init__(self):
super().__init__("artstation") super().__init__("artstation")
self.user = keys.artstation.get("username") self.user = keys.artstation.get("username")
self.session = requests.Session() self.session = requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
#"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Pragma": "no-cache",
"Cache-Control": "max-age=0, no-cache",
})
session.headers.update({
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Pragma": "no-cache",
"Cache-Control": "max-age=0, no-cache",
})
def paged_likes(self, page=1): def paged_likes(self, page=1):
url = "https://www.artstation.com/users/%s/likes.json?page=%s" % ( url = "https://www.artstation.com/users/%s/likes.json?page=%s" % (
self.user, self.user,
page, page,
) )
js = self.session.get(url, headers=self.headers) js = self.session.get(url)
while js.status_code != requests.codes.ok:
# FU cloudflare
pprint(self.session.cookies)
sleep(round(random.uniform(0.7,3.5), 2))
js = self.session.get(url)
try: try:
js = js.json() js = js.json()
if "data" not in js: if "data" not in js:
@ -46,18 +63,13 @@ class ASFavs(common.Favs):
@property @property
def likes(self): def likes(self):
# init Session and it's cookies before doing anything js = self.paged_likes(1)
# FU cloudflare, I'm trying to access my own likes and followings!
url = "https://www.artstation.com/"
self.session.get(url, headers=self.headers)
# now do the real work
js = self.paged_likes()
if not js: if not js:
return [] return []
likes = js.get("data", []) likes = js.get("data", [])
pages = ceil(js.get("total_count", 1) / 50) pages = ceil(js.get("total_count", 1) / 50)
while pages > 1: while pages > 1:
extras = self.paged_likes() extras = self.paged_likes(pages)
if not extras: if not extras:
continue continue
likes = likes + extras.get("data", []) likes = likes + extras.get("data", [])
@ -88,6 +100,7 @@ class ASFavs(common.Favs):
return feeds return feeds
def run(self): def run(self):
# FU cloudflare
for like in self.likes: for like in self.likes:
like = ASLike(like, self.session, self.headers) like = ASLike(like, self.session, self.headers)
like.run() like.run()
@ -138,14 +151,14 @@ class ASLike(common.ImgFav):
if not len(title): if not len(title):
title = self.like.get("slug") title = self.like.get("slug")
if not len(title): if not len(title):
title = common.slugfname(self.url) title = common.url2slug(self.url)
return title return title
@property @property
def slug(self): def slug(self):
maybe = self.like.get("slug") maybe = self.like.get("slug")
if not len(maybe): if not len(maybe):
maybe = common.slugfname(self.url) maybe = common.url2slug(self.url)
return maybe return maybe
@property @property
@ -155,7 +168,7 @@ class ASLike(common.ImgFav):
"favorite", "favorite",
"artstation_%s_%s_%s" "artstation_%s_%s_%s"
% ( % (
common.slugfname("%s" % self.like.get("user").get("username")), common.url2slug("%s" % self.like.get("user").get("username")),
self.like.get("hash_id"), self.like.get("hash_id"),
self.slug, self.slug,
), ),

View file

@ -122,7 +122,7 @@ class DAFav(common.ImgFav):
def title(self): def title(self):
title = self.deviation.title title = self.deviation.title
if not len(title): if not len(title):
title = common.slugfname(self.url) title = common.url2slug(self.url)
return clean(title.strip()) return clean(title.strip())
@property @property
@ -132,15 +132,15 @@ class DAFav(common.ImgFav):
"favorite", "favorite",
"deviantart_%s_%s_%s" "deviantart_%s_%s_%s"
% ( % (
common.slugfname("%s" % self.deviation.author), common.url2slug("%s" % self.deviation.author),
self.id, self.id,
common.slugfname("%s" % self.title), common.url2slug("%s" % self.title),
), ),
) )
@property @property
def published(self): def published(self):
return arrow.get(self.deviation.published_time) return arrow.get(int(self.deviation.published_time))
@property @property
def tags(self): def tags(self):

View file

@ -10,9 +10,6 @@ import settings
from pprint import pprint from pprint import pprint
import logging import logging
# class FlickrFollows(common.Follows):
class FlickrFavs(common.Favs): class FlickrFavs(common.Favs):
def __init__(self): def __init__(self):
super().__init__("flickr") super().__init__("flickr")
@ -109,12 +106,16 @@ class FlickrFav(common.ImgFav):
return os.path.join( return os.path.join(
settings.paths.get("archive"), settings.paths.get("archive"),
"favorite", "favorite",
"flickr_%s_%s" % (common.slugfname("%s" % self.owner.id), self.id), "flickr_%s_%s" % (common.url2slug("%s" % self.owner.id), self.id),
) )
@property @property
def published(self): def published(self):
return arrow.get(self.info.get("dateuploaded")) x = self.info.get("dateuploaded")
if x.isnumeric():
return arrow.get(int(x))
else:
return arrow.get(x)
@property @property
def tags(self): def tags(self):

View file

@ -10,31 +10,12 @@ import settings
import keys import keys
from pprint import pprint from pprint import pprint
from math import floor from math import floor
from common import cached_property
Track = namedtuple( Track = namedtuple(
"Track", ["timestamp", "artist", "album", "title", "artistid", "albumid", "img"] "Track", ["timestamp", "artist", "album", "title", "artistid", "albumid", "img"]
) )
class cached_property(object):
""" extermely simple cached_property decorator:
whenever something is called as @cached_property, on first run, the
result is calculated, then the class method is overwritten to be
a property, contaning the result from the method
"""
def __init__(self, method, name=None):
self.method = method
self.name = name or method.__name__
def __get__(self, inst, cls):
if inst is None:
return self
result = self.method(inst)
setattr(inst, self.name, result)
return result
class LastFM(object): class LastFM(object):
url = "http://ws.audioscrobbler.com/2.0/" url = "http://ws.audioscrobbler.com/2.0/"
@ -46,9 +27,9 @@ class LastFM(object):
"format": "json", "format": "json",
"limit": "200", "limit": "200",
} }
if os.path.isfile(self.target): # if os.path.isfile(self.target):
mtime = os.path.getmtime(self.target) # mtime = os.path.getmtime(self.target)
self.params.update({"from": mtime}) # self.params.update({"from": mtime})
@property @property
def target(self): def target(self):
@ -57,6 +38,7 @@ class LastFM(object):
@cached_property @cached_property
def existing(self): def existing(self):
timestamps = [] timestamps = []
if os.path.isfile(self.target):
with open(self.target, "r") as f: with open(self.target, "r") as f:
r = csv.reader(f) r = csv.reader(f)
for row in r: for row in r:
@ -98,8 +80,11 @@ class LastFM(object):
return json.loads(r.text).get("recenttracks") return json.loads(r.text).get("recenttracks")
def run(self): def run(self):
startpage = floor(len(self.existing) / int(self.params.get("limit"))) if len(self.existing):
self.params.update({"page": startpage}) self.params.update({"from": sorted(self.existing)[-1]})
#startpage = max(1, floor(len(self.existing) / int(self.params.get("limit"))))
#startpage = 1
self.params.update({"page": 1})
try: try:
data = self.fetch() data = self.fetch()
tracks = self.extracttracks(data) tracks = self.extracttracks(data)

20
Pipfile
View file

@ -1,20 +0,0 @@
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true
[dev-packages]
[packages]
requests = "*"
arrow = "*"
unicode-slugify = "*"
lxml = "*"
bleach = "*"
deviantart = "*"
flickr-api = "*"
pytumblr = "*"
pyyaml = "*"
[requires]
python_version = "3.7"

219
Pipfile.lock generated
View file

@ -1,219 +0,0 @@
{
"_meta": {
"hash": {
"sha256": "654f2f42d6d9e3dd3aaf13b371369e3943573472fc93786661eff68d965dcb8b"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.7"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"arrow": {
"hashes": [
"sha256:03404b624e89ac5e4fc19c52045fa0f3203419fd4dd64f6e8958c522580a574a",
"sha256:41be7ea4c53c2cf57bf30f2d614f60c411160133f7a0a8c49111c30fb7e725b5"
],
"index": "pypi",
"version": "==0.14.2"
},
"bleach": {
"hashes": [
"sha256:213336e49e102af26d9cde77dd2d0397afabc5a6bf2fed985dc35b5d1e285a16",
"sha256:3fdf7f77adcf649c9911387df51254b813185e32b2c6619f690b593a617e19fa"
],
"index": "pypi",
"version": "==3.1.0"
},
"certifi": {
"hashes": [
"sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939",
"sha256:945e3ba63a0b9f577b1395204e13c3a231f9bc0223888be653286534e5873695"
],
"version": "==2019.6.16"
},
"chardet": {
"hashes": [
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
],
"version": "==3.0.4"
},
"deviantart": {
"hashes": [
"sha256:6100cc73f162e8c945f8304109d72a8eb94c6df8348d7319b3c86ea1bcc511b6"
],
"index": "pypi",
"version": "==0.1.5"
},
"flickr-api": {
"hashes": [
"sha256:2ff036ce4ca6f9be71a90310be80916b44feaeb95df5c1a9e5f57d49b64032c9",
"sha256:b9782c06315946b395d7f1b1e051fa2ff6aab4b21c5e82b1d95c04d7295f5f24"
],
"index": "pypi",
"version": "==0.7.3"
},
"future": {
"hashes": [
"sha256:67045236dcfd6816dc439556d009594abf643e5eb48992e36beac09c2ca659b8"
],
"version": "==0.17.1"
},
"httplib2": {
"hashes": [
"sha256:158fbd0ffbba536829d664bf3f32c4f45df41f8f791663665162dfaf21ffd075",
"sha256:d1146939d270f1f1eb8cbf8f5aa72ff37d897faccca448582bb1e180aeb4c6b2"
],
"version": "==0.13.0"
},
"idna": {
"hashes": [
"sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
"sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"
],
"version": "==2.8"
},
"lxml": {
"hashes": [
"sha256:06c7616601430aa140a69f97e3116308fffe0848f543b639a5ec2e8920ae72fd",
"sha256:177202792f9842374a8077735c69c41a4282183f7851443d2beb8ee310720819",
"sha256:19317ad721ceb9e39847d11131903931e2794e447d4751ebb0d9236f1b349ff2",
"sha256:36d206e62f3e5dbaafd4ec692b67157e271f5da7fd925fda8515da675eace50d",
"sha256:387115b066c797c85f9861a9613abf50046a15aac16759bc92d04f94acfad082",
"sha256:3ce1c49d4b4a7bc75fb12acb3a6247bb7a91fe420542e6d671ba9187d12a12c2",
"sha256:4d2a5a7d6b0dbb8c37dab66a8ce09a8761409c044017721c21718659fa3365a1",
"sha256:58d0a1b33364d1253a88d18df6c0b2676a1746d27c969dc9e32d143a3701dda5",
"sha256:62a651c618b846b88fdcae0533ec23f185bb322d6c1845733f3123e8980c1d1b",
"sha256:69ff21064e7debc9b1b1e2eee8c2d686d042d4257186d70b338206a80c5bc5ea",
"sha256:7060453eba9ba59d821625c6af6a266bd68277dce6577f754d1eb9116c094266",
"sha256:7d26b36a9c4bce53b9cfe42e67849ae3c5c23558bc08363e53ffd6d94f4ff4d2",
"sha256:83b427ad2bfa0b9705e02a83d8d607d2c2f01889eb138168e462a3a052c42368",
"sha256:923d03c84534078386cf50193057aae98fa94cace8ea7580b74754493fda73ad",
"sha256:b773715609649a1a180025213f67ffdeb5a4878c784293ada300ee95a1f3257b",
"sha256:baff149c174e9108d4a2fee192c496711be85534eab63adb122f93e70aa35431",
"sha256:bca9d118b1014b4c2d19319b10a3ebed508ff649396ce1855e1c96528d9b2fa9",
"sha256:ce580c28845581535dc6000fc7c35fdadf8bea7ccb57d6321b044508e9ba0685",
"sha256:d34923a569e70224d88e6682490e24c842907ba2c948c5fd26185413cbe0cd96",
"sha256:dd9f0e531a049d8b35ec5e6c68a37f1ba6ec3a591415e6804cbdf652793d15d7",
"sha256:ecb805cbfe9102f3fd3d2ef16dfe5ae9e2d7a7dfbba92f4ff1e16ac9784dbfb0",
"sha256:ede9aad2197a0202caff35d417b671f5f91a3631477441076082a17c94edd846",
"sha256:ef2d1fc370400e0aa755aab0b20cf4f1d0e934e7fd5244f3dd4869078e4942b9",
"sha256:f2fec194a49bfaef42a548ee657362af5c7a640da757f6f452a35da7dd9f923c"
],
"index": "pypi",
"version": "==4.3.4"
},
"oauth2": {
"hashes": [
"sha256:15b5c42301f46dd63113f1214b0d81a8b16254f65a86d3c32a1b52297f3266e6",
"sha256:c006a85e7c60107c7cc6da1b184b5c719f6dd7202098196dfa6e55df669b59bf"
],
"version": "==1.9.0.post1"
},
"oauthlib": {
"hashes": [
"sha256:40a63637707e9163eda62d0f5345120c65e001a790480b8256448543c1f78f66",
"sha256:b4d99ae8ccfb7d33ba9591b59355c64eef5241534aa3da2e4c0435346b84bc8e"
],
"version": "==3.0.2"
},
"python-dateutil": {
"hashes": [
"sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb",
"sha256:c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e"
],
"version": "==2.8.0"
},
"pytumblr": {
"hashes": [
"sha256:a3774d3978bcff2db98f36a2e5d17bb8496ac21157b1b518089adad86d0dca72",
"sha256:eaa4d98217df7ab6392fa5d8801f4a2bdcba35bf0fd49328aa3c98e3b231b6f2"
],
"index": "pypi",
"version": "==0.1.0"
},
"pyyaml": {
"hashes": [
"sha256:57acc1d8533cbe51f6662a55434f0dbecfa2b9eaf115bede8f6fd00115a0c0d3",
"sha256:588c94b3d16b76cfed8e0be54932e5729cc185caffaa5a451e7ad2f7ed8b4043",
"sha256:68c8dd247f29f9a0d09375c9c6b8fdc64b60810ebf07ba4cdd64ceee3a58c7b7",
"sha256:70d9818f1c9cd5c48bb87804f2efc8692f1023dac7f1a1a5c61d454043c1d265",
"sha256:86a93cccd50f8c125286e637328ff4eef108400dd7089b46a7be3445eecfa391",
"sha256:a0f329125a926876f647c9fa0ef32801587a12328b4a3c741270464e3e4fa778",
"sha256:a3c252ab0fa1bb0d5a3f6449a4826732f3eb6c0270925548cac342bc9b22c225",
"sha256:b4bb4d3f5e232425e25dda21c070ce05168a786ac9eda43768ab7f3ac2770955",
"sha256:cd0618c5ba5bda5f4039b9398bb7fb6a317bb8298218c3de25c47c4740e4b95e",
"sha256:ceacb9e5f8474dcf45b940578591c7f3d960e82f926c707788a570b51ba59190",
"sha256:fe6a88094b64132c4bb3b631412e90032e8cfe9745a58370462240b8cb7553cd"
],
"index": "pypi",
"version": "==5.1.1"
},
"requests": {
"hashes": [
"sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4",
"sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31"
],
"index": "pypi",
"version": "==2.22.0"
},
"requests-oauthlib": {
"hashes": [
"sha256:bd6533330e8748e94bf0b214775fed487d309b8b8fe823dc45641ebcd9a32f57",
"sha256:d3ed0c8f2e3bbc6b344fa63d6f933745ab394469da38db16bdddb461c7e25140",
"sha256:dd5a0499abfefd087c6dd96693cbd5bfd28aa009719a7f85ab3fabe3956ef19a"
],
"version": "==1.2.0"
},
"sanction": {
"hashes": [
"sha256:3e41b24e28590a0dfed68eddd10e44fa01feb81812ffb49085ca764e51aea9fe"
],
"version": "==0.4.1"
},
"six": {
"hashes": [
"sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
"sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
],
"version": "==1.12.0"
},
"unicode-slugify": {
"hashes": [
"sha256:34cf3afefa6480efe705a4fc0eaeeaf7f49754aec322ba3e8b2f27dc1cbcf650"
],
"index": "pypi",
"version": "==0.1.3"
},
"unidecode": {
"hashes": [
"sha256:1d7a042116536098d05d599ef2b8616759f02985c85b4fef50c78a5aaf10822a",
"sha256:2b6aab710c2a1647e928e36d69c21e76b453cd455f4e2621000e54b2a9b8cce8"
],
"version": "==1.1.1"
},
"urllib3": {
"hashes": [
"sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1",
"sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232"
],
"version": "==1.25.3"
},
"webencodings": {
"hashes": [
"sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78",
"sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"
],
"version": "==0.5.1"
}
},
"develop": {}
}

View file

@ -39,7 +39,8 @@ class TumblrFavs(common.Favs):
feeds.append( feeds.append(
{ {
"text": u.get("name"), "text": u.get("name"),
"xmlUrl": "%srss" % u.get("url"), "xmlUrl": "https://cloud.petermolnar.net/rss-bridge/index.php?action=display&bridge=Tumblr&searchUsername=%s&format=Atom" % u.get("name"),
#"xmlUrl": "%srss" % u.get("url"),
"htmlUrl": u.get("url"), "htmlUrl": u.get("url"),
} }
) )
@ -95,7 +96,7 @@ class TumblrFav(common.ImgFav):
if not len(title): if not len(title):
title = self.data.get("slug", "") title = self.data.get("slug", "")
if not len(title): if not len(title):
title = common.slugfname(self.url) title = common.url2slug(self.url)
return clean(title.strip()) return clean(title.strip())
@property @property

336
common.py
View file

@ -16,20 +16,36 @@ import settings
import keys import keys
import yaml import yaml
from pprint import pprint from pprint import pprint
import feedparser
TMPFEXT = ".xyz" TMPFEXT = ".xyz"
MDFEXT = ".md" MDFEXT = ".md"
TMPSUBDIR = "nasg"
SHM = "/dev/shm"
if os.path.isdir(SHM) and os.access(SHM, os.W_OK):
TMPDIR = f"{SHM}/{TMPSUBDIR}"
else:
TMPDIR = os.path.join(gettempdir(), TMPSUBDIR)
if not os.path.isdir(TMPDIR):
os.makedirs(TMPDIR)
def utfyamldump(data): def utfyamldump(data):
""" dump YAML with actual UTF-8 chars """ """ dump YAML with actual UTF-8 chars """
return yaml.dump(data, default_flow_style=False, indent=4, allow_unicode=True) return yaml.dump(
data, default_flow_style=False, indent=4, allow_unicode=True
)
def slugfname(url): def url2slug(url):
return slugify(re.sub(r"^https?://(?:www)?", "", url), only_ascii=True, lower=True)[ return slugify(
:200 re.sub(r"^https?://(?:www)?", "", url),
] only_ascii=True,
lower=True,
)[:200]
class cached_property(object): class cached_property(object):
@ -51,7 +67,178 @@ class cached_property(object):
return result return result
class Follows(dict): class Aperture(object):
def __init__(self):
self.session = requests.Session()
self.session.headers.update(
{
"Authorization": "Bearer %s"
% (keys.aperture["access_token"])
}
)
self.url = keys.aperture["url"]
@cached_property
def channels(self):
channels = self.session.get(f"{self.url}?action=channels")
if channels.status_code != requests.codes.ok:
logging.error(
"failed to get channels from aperture: ", channels.text
)
return None
try:
channels = channels.json()
except ValueError as e:
logging.error("failed to parse channels from aperture: ", e)
return None
if "channels" not in channels:
logging.error("no channels found in aperture: ")
return None
return channels["channels"]
def channelid(self, channelname):
for channel in self.channels:
if channel["name"].lower() == channelname.lower():
return channel["uid"]
return None
def feedmeta(self, url):
cfile = os.path.join(
TMPDIR,
"%s.%s.json" % (url2slug(url), self.__class__.__name__)
)
if os.path.exists(cfile):
with open(cfile, 'rt') as cache:
return json.loads(cache.read())
r = {
'title': url,
'feed': url,
'link': url,
'type': 'rss'
}
try:
feed = feedparser.parse(url)
if 'feed' in feed:
for maybe in ['title', 'link']:
if maybe in feed['feed']:
r[maybe] = feed['feed'][maybe]
except Exception as e:
logging.error("feedparser failed on %s: %s" %(url, e))
r['type']: 'hfeed'
pass
with open(cfile, 'wt') as cache:
cache.write(json.dumps(r))
return r
def channelfollows(self, channelid):
follows = self.session.get(
f"{self.url}?action=follow&channel={channelid}"
)
if follows.status_code != requests.codes.ok:
logging.error(
"failed to get follows from aperture: ", follows.text
)
return
try:
follows = follows.json()
except ValueError as e:
logging.error("failed to parse follows from aperture: ", e)
return
if "items" not in follows:
logging.error(
f"no follows found in aperture for channel {channelid}"
)
return
existing = {}
for follow in follows["items"]:
meta = self.feedmeta(follow["url"])
existing.update({follow["url"]: meta})
return existing
@cached_property
def follows(self):
follows = {}
for channel in self.channels:
follows[channel["name"]] = self.channelfollows(
channel["uid"]
)
return follows
def export(self):
opml = etree.Element("opml", version="1.0")
xmldoc = etree.ElementTree(opml)
opml.addprevious(
etree.ProcessingInstruction(
"xml-stylesheet",
'type="text/xsl" href="%s"'
% (settings.opml.get("xsl")),
)
)
head = etree.SubElement(opml, "head")
title = etree.SubElement(
head, "title"
).text = settings.opml.get("title")
dt = etree.SubElement(
head, "dateCreated"
).text = arrow.utcnow().format("ddd, DD MMM YYYY HH:mm:ss UTC")
owner = etree.SubElement(
head, "ownerName"
).text = settings.opml.get("owner")
email = etree.SubElement(
head, "ownerEmail"
).text = settings.opml.get("email")
body = etree.SubElement(opml, "body")
groups = {}
for group, feeds in self.follows.items():
if (
"private" in group.lower()
or "nsfw" in group.lower()
):
continue
if group not in groups.keys():
groups[group] = etree.SubElement(
body, "outline", text=group
)
for url, meta in feeds.items():
entry = etree.SubElement(
groups[group],
"outline",
type="rss",
text=meta['title'],
xmlUrl=meta['feed'],
htmlUrl=meta['link']
)
etree.tostring(
xmldoc,
encoding="utf-8",
xml_declaration=True,
pretty_print=True,
)
opmlfile = os.path.join(
settings.paths.get("content"), "following.opml"
)
with open(opmlfile, "wb") as f:
f.write(
etree.tostring(
xmldoc,
encoding="utf-8",
xml_declaration=True,
pretty_print=True,
)
)
class MinifluxFollows(dict):
def __init__(self): def __init__(self):
self.auth = HTTPBasicAuth( self.auth = HTTPBasicAuth(
keys.miniflux.get("username"), keys.miniflux.get("token") keys.miniflux.get("username"), keys.miniflux.get("token")
@ -60,9 +247,15 @@ class Follows(dict):
@property @property
def subscriptions(self): def subscriptions(self):
feeds = [] feeds = []
params = {"jsonrpc": "2.0", "method": "getFeeds", "id": keys.miniflux.get("id")} params = {
"jsonrpc": "2.0",
"method": "getFeeds",
"id": keys.miniflux.get("id"),
}
r = requests.post( r = requests.post(
keys.miniflux.get("url"), data=json.dumps(params), auth=self.auth keys.miniflux.get("url"),
data=json.dumps(params),
auth=self.auth,
) )
return r.json().get("result", []) return r.json().get("result", [])
@ -96,24 +289,31 @@ class Follows(dict):
opml.addprevious( opml.addprevious(
etree.ProcessingInstruction( etree.ProcessingInstruction(
"xml-stylesheet", "xml-stylesheet",
'type="text/xsl" href="%s"' % (settings.opml.get("xsl")), 'type="text/xsl" href="%s"'
% (settings.opml.get("xsl")),
) )
) )
head = etree.SubElement(opml, "head") head = etree.SubElement(opml, "head")
title = etree.SubElement(head, "title").text = settings.opml.get("title") title = etree.SubElement(
dt = etree.SubElement(head, "dateCreated").text = arrow.utcnow().format( head, "title"
"ddd, DD MMM YYYY HH:mm:ss UTC" ).text = settings.opml.get("title")
) dt = etree.SubElement(
owner = etree.SubElement(head, "ownerName").text = settings.opml.get("owner") head, "dateCreated"
email = etree.SubElement(head, "ownerEmail").text = settings.opml.get("email") ).text = arrow.utcnow().format("ddd, DD MMM YYYY HH:mm:ss UTC")
owner = etree.SubElement(
head, "ownerName"
).text = settings.opml.get("owner")
email = etree.SubElement(
head, "ownerEmail"
).text = settings.opml.get("email")
body = etree.SubElement(opml, "body") body = etree.SubElement(opml, "body")
groups = {} groups = {}
for feed in self.subscriptions: for feed in self.subscriptions:
# contains sensitive data, skip it # contains sensitive data, skip it
if "sessionid" in feed.get("feed_url") or "sessionid" in feed.get( if "sessionid" in feed.get(
"site_url" "feed_url"
): ) or "sessionid" in feed.get("site_url"):
continue continue
fgroup = feed.get("groups", None) fgroup = feed.get("groups", None)
@ -136,12 +336,17 @@ class Follows(dict):
htmlUrl=feed.get("site_url"), htmlUrl=feed.get("site_url"),
) )
opmlfile = os.path.join(settings.paths.get("content"), "following.opml") opmlfile = os.path.join(
settings.paths.get("content"), "following.opml"
)
with open(opmlfile, "wb") as f: with open(opmlfile, "wb") as f:
f.write( f.write(
etree.tostring( etree.tostring(
xmldoc, encoding="utf-8", xml_declaration=True, pretty_print=True xmldoc,
encoding="utf-8",
xml_declaration=True,
pretty_print=True,
) )
) )
@ -149,6 +354,11 @@ class Follows(dict):
class Favs(object): class Favs(object):
def __init__(self, silo): def __init__(self, silo):
self.silo = silo self.silo = silo
self.aperture_auth = {
"Authorization": "Bearer %s"
% (keys.aperture["access_token"])
}
self.aperture_chid = 0
@property @property
def feeds(self): def feeds(self):
@ -156,7 +366,9 @@ class Favs(object):
@property @property
def since(self): def since(self):
d = os.path.join(settings.paths.get("archive"), "favorite", "%s*" % self.silo) d = os.path.join(
settings.paths.get("archive"), "favorite", "%s*" % self.silo
)
files = glob.glob(d) files = glob.glob(d)
if len(files): if len(files):
mtime = max([int(os.path.getmtime(f)) for f in files]) mtime = max([int(os.path.getmtime(f)) for f in files])
@ -164,6 +376,81 @@ class Favs(object):
mtime = 0 mtime = 0
return mtime return mtime
def sync_with_aperture(self):
channels = requests.get(
"%s?action=channels" % (keys.aperture["url"]),
headers=self.aperture_auth,
)
if channels.status_code != requests.codes.ok:
logging.error(
"failed to get channels from aperture: ", channels.text
)
return
try:
channels = channels.json()
except ValueError as e:
logging.error("failed to parse channels from aperture: ", e)
return
if "channels" not in channels:
logging.error("no channels found in aperture: ")
return
for channel in channels["channels"]:
if channel["name"].lower() == self.silo.lower():
self.aperture_chid = channel["uid"]
break
if not self.aperture_chid:
logging.error("no channels found for silo ", self.silo)
return
follows = requests.get(
"%s?action=follow&channel=%s"
% (keys.aperture["url"], self.aperture_chid),
headers=self.aperture_auth,
)
if follows.status_code != requests.codes.ok:
logging.error(
"failed to get follows from aperture: ", follows.text
)
return
try:
follows = follows.json()
except ValueError as e:
logging.error("failed to parse follows from aperture: ", e)
return
if "items" not in follows:
logging.error(
"no follows found in aperture for channel %s (%s)"
% (self.silo, self.aperture_chid)
)
return
existing = []
for follow in follows["items"]:
existing.append(follow["url"])
existing = list(set(existing))
for feed in self.feeds:
if feed["xmlUrl"] not in existing:
subscribe_to = {
"action": "follow",
"channel": self.aperture_chid,
"url": feed["xmlUrl"],
}
logging.info(
"subscribing to %s into %s (%s)"
% (feed, self.silo, self.aperture_chid)
)
subscribe = requests.post(
keys.aperture["url"],
headers=self.aperture_auth,
data=subscribe_to,
)
logging.debug(subscribe.text)
class ImgFav(object): class ImgFav(object):
def __init__(self): def __init__(self):
@ -182,8 +469,11 @@ class ImgFav(object):
return False return False
def save_txt(self): def save_txt(self):
attachments = [os.path.basename(fn) for fn in glob.glob("%s*" % self.targetprefix) attachments = [
if not os.path.basename(fn).endswith('.md')] os.path.basename(fn)
for fn in glob.glob("%s*" % self.targetprefix)
if not os.path.basename(fn).endswith(".md")
]
meta = { meta = {
"title": self.title, "title": self.title,
"favorite-of": self.url, "favorite-of": self.url,

13
run.py
View file

@ -4,24 +4,27 @@ import Tumblr
import LastFM import LastFM
import DeviantArt import DeviantArt
import Flickr import Flickr
import Artstation #import Artstation
from pprint import pprint from pprint import pprint
lfm = LastFM.LastFM() lfm = LastFM.LastFM()
lfm.run() lfm.run()
opml = common.Follows() #opml = common.Follows()
silos = [ silos = [
DeviantArt.DAFavs(), DeviantArt.DAFavs(),
Flickr.FlickrFavs(), Flickr.FlickrFavs(),
Tumblr.TumblrFavs(), Tumblr.TumblrFavs(),
Artstation.ASFavs(), # Artstation.ASFavs(),
] ]
for silo in silos: for silo in silos:
silo.run() silo.run()
opml.update({silo.silo: silo.feeds}) #silo.sync_with_aperture()
#opml.update({silo.silo: silo.feeds})
opml.sync() #opml.sync()
#opml.export()
opml = common.Aperture()
opml.export() opml.export()

View file

@ -3,19 +3,25 @@ import re
import argparse import argparse
import logging import logging
class nameddict(dict):
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
base = os.path.abspath(os.path.expanduser("~/Projects/petermolnar.net")) base = os.path.abspath(os.path.expanduser("~/Projects/petermolnar.net"))
opml = { opml = nameddict({
"owner": "Peter Molnar", "owner": "Peter Molnar",
"email": "mail@petermolnar.net", "email": "mail@petermolnar.net",
"title": "feeds followed by petermolnar.net", "title": "feeds followed by petermolnar.net",
"xsl": "https://petermolnar.net/following.xsl", "xsl": "https://petermolnar.net/following.xsl",
} })
paths = { paths = nameddict({
"archive": os.path.join(base, "archive"), "archive": os.path.join(base, "archive"),
"content": os.path.join(base, "content"), "content": os.path.join(base, "content"),
} "bookmarks": os.path.join(base, "archive", "bookmarks")
})
loglevels = {"critical": 50, "error": 40, "warning": 30, "info": 20, "debug": 10} loglevels = {"critical": 50, "error": 40, "warning": 30, "info": 20, "debug": 10}