all repos — silo.pasta @ 83bb380bf2bb1f4c194cb4fd7f73c76b79a99749

initial release with LastFM, Flickr, Tumblr, DeviantArt support
Peter Molnar hello@petermolnar.eu
Mon, 15 Oct 2018 14:16:10 +0100
commit

83bb380bf2bb1f4c194cb4fd7f73c76b79a99749

9 files changed, 767 insertions(+), 0 deletions(-)

jump to
A .gitignore

@@ -0,0 +1,5 @@

+keys.py +.venv +__pycache +keys.py +__pycache__
A DeviantArt.py

@@ -0,0 +1,137 @@

+import os +import glob +import deviantart +from bleach import clean +import arrow +import keys +import common +import settings +from pprint import pprint +import logging + +class DAFavs(common.Favs): + def __init__(self): + super().__init__('deviantart') + self.client = deviantart.Api( + keys.deviantart.get('key'), + keys.deviantart.get('secret'), + scope='user' + ) + self.favfolder = None + + def run(self): + offset = 0 + while not self.favfolder: + try: + folders = self.client.get_collections( + username=keys.deviantart.get('username'), + offset=offset + ) + offset = folders.get('next_offset') + for r in folders.get('results'): + if r.get('name') == 'Featured': + self.favfolder = r.get('folderid') + if (folders.get('has_more') == False): + break + except deviantart.api.DeviantartError as e: + print(e) + break + + offset = 0 + has_more = True + while has_more: + try: + fetched = self.client.get_collection( + self.favfolder, + username=keys.deviantart.get('username'), + offset=offset, + ) + for r in fetched.get('results'): + fav = DAFav(r) + fav.run() + offset = fetched.get('next_offset') + has_more = fetched.get('has_more') + if (has_more == False): + break + except deviantart.api.DeviantartError as e: + print(e) + break + + +class DAFav(common.ImgFav): + def __init__(self, deviation, ): + self.deviation = deviation + + def __str__(self): + return "fav-of %s" % (self.deviation.url) + + @property + def author(self): + return { + 'name': self.deviation.author, + 'url': 'http://%s.deviantart.com' % self.deviation.author + } + + @property + def id(self): + return self.deviation.deviationid + + @property + def url(self): + return self.deviation.url + + @property + def content(self): + if self.deviation.excerpt: + return "%s" % self.deviation.excerpt + return '' + + @property + def title(self): + title = self.deviation.title + if not len(title): + title = common.slugfname(self.url) + return clean(title.strip()) + + @property + def targetprefix(self): + return os.path.join( + settings.paths.get('archive'), + 'favorite', + "deviantart_%s_%s_%s" % ( + common.slugfname('%s' % self.deviation.author), + self.id, + common.slugfname('%s' % self.title) + ) + ) + + @property + def exists(self): + maybe = glob.glob("%s*" % self.targetprefix) + if len(maybe): + return True + return False + + @property + def published(self): + return arrow.get(self.deviation.published_time) + + @property + def tags(self): + return [self.deviation.category] + + @property + def images(self): + f = "%s%s" % (self.targetprefix, common.TMPFEXT) + return { + f: self.deviation.content.get('src') + } + + def run(self): + if not self.exists: + self.fetch_images() + + +if __name__ == '__main__': + t = DAFavs() + t.run()
A Flickr.py

@@ -0,0 +1,225 @@

+import os +import glob +import flickr_api +from bleach import clean +import arrow +import keys +import common +import settings +from pprint import pprint +import logging + +class FlickrFavs(common.Favs): + def __init__(self): + super().__init__('flickr') + flickr_api.set_keys( + api_key = keys.flickr.get('key'), + api_secret = keys.flickr.get('secret') + ) + self.user = flickr_api.Person.findByUserName( + keys.flickr.get('username') + ) + + def run(self): + pages = 1 + page = 1 + while page <= pages: + #try: + fetched = self.user.getFavorites( + user_id=self.user.id, + #extras=','.join([ + #'description', + #'geo', + #'tags', + #'owner_name', + #'date_upload', + #'url_o', + #'url_k', + #'url_h', + #'url_b', + #'url_c', + #'url_z', + #]), + #'min_fave_date': self.lastpulled + page=page + ) + for p in fetched: + photo = FlickrFav(p) + photo.run() + pages = fetched.info.pages + page = page + 1 + + +class FlickrFav(common.ImgFav): + def __init__(self, flickrphoto): + self.flickrphoto = flickrphoto + self.info = flickrphoto.getInfo() + self.owner = self.info.get('owner') + + def __str__(self): + return "fav-of %s" % (self.url) + + @property + def author(self): + return { + 'name': "%s" % self.owner.username, + 'url': "%s" % self.owner.getProfileUrl(), + } + + @property + def id(self): + return "%s" % self.info.get('id') + + @property + def url(self): + return "https://www.flickr.com/photos/%s/%s/" % ( + self.owner.id, + self.id + ) + + @property + def content(self): + return "%s" % self.info.get('description') + + @property + def geo(self): + if 'location' not in self.info: + return None + + lat = self.info.get('location').get('latitude', None) + lon = self.info.get('location').get('longitude', None) + return (lat, lon) + + @property + def title(self): + return clean(''.strip("%s" % self.info.get('title'))) + + @property + def targetprefix(self): + return os.path.join( + settings.paths.get('archive'), + 'favorite', + "flickr_%s_%s" % ( + common.slugfname('%s' % self.owner.id), + self.id, + ) + ) + + @property + def exists(self): + maybe = glob.glob("%s*" % self.targetprefix) + if len(maybe): + return True + return False + + @property + def published(self): + return arrow.get(self.info.get('dateuploaded')) + + @property + def tags(self): + tags = [] + for t in self.info.get('tags'): + tags.append("%s" % t.text) + return tags + + @property + def images(self): + sizes = self.flickrphoto.getSizes() + for maybe in ['Original', 'Large 2048', 'Large 1600', 'Large']: + if maybe in sizes: + f = "%s%s" % (self.targetprefix, common.TMPFEXT) + return { + f: sizes.get(maybe).get('source') + } + + def run(self): + if not self.exists: + self.fetch_images() + + +if __name__ == '__main__': + t = FlickrFavs() + t.run() + +#https://api.flickr.com/services/rest/?method=flickr.favorites.getPublicList&api_key=80a5c2e7fdad3ed1304298850caab99d&user_id=36003160%40N08&per_page=500&format=json&nojsoncallback=1 + + +#class FlickrFavs(Favs): + #url = 'https://api.flickr.com/services/rest/' + + #def __init__(self): + #super().__init__('flickr') + #self.get_uid() + #self.params = { + #'method': 'flickr.favorites.getList', + #'api_key': shared.config.get('api_flickr', 'api_key'), + #'user_id': self.uid, + #'extras': ','.join([ + #'description', + #'geo', + #'tags', + #'owner_name', + #'date_upload', + #'url_o', + #'url_k', + #'url_h', + #'url_b', + #'url_c', + #'url_z', + #]), + #'per_page': 500, # maximim + #'format': 'json', + #'nojsoncallback': '1', + #'min_fave_date': self.lastpulled + #} + + #def get_uid(self): + #params = { + #'method': 'flickr.people.findByUsername', + #'api_key': shared.config.get('api_flickr', 'api_key'), + #'format': 'json', + #'nojsoncallback': '1', + #'username': shared.config.get('api_flickr', 'username'), + #} + #r = requests.get( + #self.url, + #params=params + #) + #parsed = json.loads(r.text) + #self.uid = parsed.get('user', {}).get('id') + + #def getpaged(self, offset): + #logging.info('requesting page #%d of paginated results', offset) + #self.params.update({ + #'page': offset + #}) + #r = requests.get( + #self.url, + #params=self.params + #) + #parsed = json.loads(r.text) + #return parsed.get('photos', {}).get('photo', []) + + #def run(self): + #r = requests.get(self.url, params=self.params) + #js = json.loads(r.text) + #js = js.get('photos', {}) + + #photos = js.get('photo', []) + + #total = int(js.get('pages', 1)) + #current = int(js.get('page', 1)) + #cntr = total - current + + #while cntr > 0: + #current = current + 1 + #paged = self.getpaged(current) + #photos = photos + paged + #cntr = total - current + + #for photo in photos: + #fav = FlickrFav(photo) + #if not fav.exists: + #fav.run() + ## fav.fix_extension()
A LastFM.py

@@ -0,0 +1,102 @@

+import os +import csv +import json +import logging +from operator import attrgetter +from collections import namedtuple +import requests +import arrow +import settings +import keys +from pprint import pprint + +Track = namedtuple( + 'Track', + ['timestamp', 'artist', 'album', 'title', 'artistid', 'albumid', 'img'] +) + +class LastFM(object): + url = 'http://ws.audioscrobbler.com/2.0/' + + def __init__(self): + self.params = { + 'method': 'user.getrecenttracks', + 'user': keys.lastfm.get('username'), + 'api_key': keys.lastfm.get('key'), + 'format': 'json', + 'limit': '200' + } + if os.path.isfile(self.target): + mtime = os.path.getmtime(self.target) + self.params.update({'from': mtime}) + + @property + def target(self): + return os.path.join( + settings.paths.get('archive'), + 'lastfm.csv' + ) + + @property + def exists(self): + return os.path.isfile(self.target) + + + def extracttracks(self, data): + tracks = [] + for track in data.get('track', []): + if 'date' not in track: + continue + entry = Track( + arrow.get( + int(track.get('date').get('uts')) + ).format('YYYY-MM-DDTHH:mm:ssZ'), + track.get('artist').get('#text', ''), + track.get('album').get('#text', ''), + track.get('name', ''), + track.get('artist').get('mbid', ''), + track.get('album').get('mbid', ''), + track.get('image', [])[-1].get('#text', ''), + ) + tracks.append(entry) + return tracks + + def fetch(self): + r = requests.get(self.url, params=self.params) + return json.loads(r.text).get('recenttracks') + + + def run(self): + data = self.fetch() + tracks = self.extracttracks(data) + total = int(data.get('@attr').get('totalPages')) + current = int(data.get('@attr').get('page')) + cntr = total - current + + if not len(tracks): + return + + while cntr > 0: + current = current + 1 + cntr = total - current + logging.info('requesting page #%d of paginated results', current) + self.params.update({ + 'page': current + }) + data = self.fetch() + tracks = tracks + self.extracttracks(data) + + if not self.exists: + with open(self.target, 'w') as f: + writer = csv.DictWriter(f, fieldnames=Track._fields) + writer.writeheader() + + if len(tracks): + with open(self.target, 'a') as f: + writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC) + writer.writerows(sorted(tracks, key=attrgetter('timestamp'))) + + +if __name__ == '__main__': + lfm = LastFM() + lfm.run()
A Tumblr.py

@@ -0,0 +1,120 @@

+import os +import glob +import pytumblr +import arrow +import keys +import common +import settings +from bleach import clean +from pprint import pprint + + +class TumblrFavs(common.Favs): + def __init__(self): + super().__init__('tumblr') + self.client = pytumblr.TumblrRestClient( + keys.tumblr.get('key'), + keys.tumblr.get('secret'), + keys.tumblr.get('oauth_token'), + keys.tumblr.get('oauth_secret') + ) + + def run(self): + likes = self.client.likes(after=self.since) + if 'liked_posts' not in likes: + return + + for like in likes.get('liked_posts'): + fav = TumblrFav(like) + + fav.run() + + +class TumblrFav(common.ImgFav): + def __init__(self, data): + self.data = data + + def __str__(self): + return "like-of %s from blog %s" % (self.url, self.blogname) + + @property + def blogname(self): + return self.data.get('blog_name') + + @property + def id(self): + return self.data.get('id') + + @property + def url(self): + return self.data.get('post_url') + + @property + def content(self): + return "%s" % self.data.get('caption', '') + + @property + def title(self): + title = self.data.get('summary', '') + if not len(title): + title = self.data.get('slug', '') + if not len(title): + title = common.slugfname(self.url) + return clean(title.strip()) + + @property + def targetprefix(self): + return os.path.join( + settings.paths.get('archive'), + 'favorite', + "tumblr_%s_%s" % (self.blogname, self.id) + ) + + @property + def exists(self): + maybe = glob.glob("%s*" % self.targetprefix) + if len(maybe): + return True + return False + + @property + def published(self): + maybe = self.data.get('liked_timestamp', False) + if not maybe: + maybe = self.data.get('date', False) + if not maybe: + maybe = arrow.utcnow().timestamp + return arrow.get(maybe) + + @property + def tags(self): + return self.data.get('tags', []) + + @property + def author(self): + return { + 'name': self.blogname, + 'url': 'http://%s.tumblr.com' % self.blogname + } + + @property + def images(self): + r = {} + cntr = 0 + for p in self.data.get('photos', []): + f = "%s-%d%s" % (self.targetprefix, cntr, common.TMPFEXT) + r.update({ + f: p.get('original_size').get('url') + }) + cntr = cntr + 1 + return r + + + def run(self): + if not self.exists: + self.fetch_images() + + +if __name__ == '__main__': + t = TumblrFavs() + t.run()
A common.py

@@ -0,0 +1,133 @@

+import os +import glob +import imghdr +import re +import logging +import shutil +import subprocess +from slugify import slugify +import requests +import arrow +import settings +from pprint import pprint + +TMPFEXT = '.xyz' + +def slugfname(url): + return slugify( + re.sub(r"^https?://(?:www)?", "", url), + only_ascii=True, + lower=True + )[:200] + + +class Favs(object): + def __init__(self, silo): + self.silo = silo + + @property + def since(self): + mtime = 0 + d = os.path.join( + settings.paths.get('archive'), + 'favorite', + "%s-*" % self.silo + ) + files = glob.glob(d) + + if (len(files)): + for f in files: + ftime = int(os.path.getmtime(f)) + if ftime > mtime: + mtime = ftime + # TODO why is this here? + mtime = mtime + 1 + return mtime + + +class ImgFav(object): + def __init__(self): + return + + def fetch_images(self): + for fpath, url in self.images.items(): + self.fetch_image(fpath, url) + + def fetch_image(self, fpath, url): + logging.info("pulling image %s to %s", url, fpath) + r = requests.get(url, stream=True) + if r.status_code == 200: + with open(fpath, 'wb') as f: + r.raw.decode_content = True + shutil.copyfileobj(r.raw, f) + + imgtype = imghdr.what(fpath) + if not imgtype: + os.remove(fpath) + return + if imgtype in ['jpg', 'jpeg', 'png']: + self.write_exif(fpath) + os.rename(fpath, fpath.replace(TMPFEXT, ".%s" % (imgtype))) + + def write_exif(self, fpath): + logging.info('populating EXIF data of %s' % fpath) + + geo_lat = False + geo_lon = False + + if hasattr(self, 'geo') and self.geo != None: + lat, lon = self.geo + if lat and lon and 'null' != lat and 'null' != lon: + geo_lat = lat + geo_lon = lon + + params = [ + 'exiftool', + '-overwrite_original', + '-XMP:Copyright=Copyright %s %s (%s)' % ( + self.published.to('utc').format('YYYY'), + self.author.get('name'), + self.author.get('url'), + ), + '-XMP:Source=%s' % self.url, + '-XMP:ReleaseDate=%s' % self.published.to('utc').format('YYYY:MM:DD HH:mm:ss'), + '-XMP:Headline=%s' % self.title, + '-XMP:Description=%s' % self.content, + ] + + for t in self.tags: + params.append('-XMP:HierarchicalSubject+=%s' % t) + params.append('-XMP:Subject+=%s' % t) + + if geo_lat and geo_lon: + geo_lat = round(float(geo_lat), 6) + geo_lon = round(float(geo_lon), 6) + + if geo_lat < 0: + GPSLatitudeRef = 'S' + else: + GPSLatitudeRef = 'N' + + if geo_lon < 0: + GPSLongitudeRef = 'W' + else: + GPSLongitudeRef = 'E' + + params.append('-GPSLongitude=%s' % abs(geo_lon)) + params.append('-GPSLatitude=%s' % abs(geo_lat)) + params.append('-GPSLongitudeRef=%s' % GPSLongitudeRef) + params.append('-GPSLatitudeRef=%s' % GPSLatitudeRef) + + params.append(fpath) + + p = subprocess.Popen( + params, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + stdout, stderr = p.communicate() + _original = '%s_original' % fpath + if os.path.exists(_original): + os.unlink(_original)
A requirements.txt

@@ -0,0 +1,5 @@

+deviantart==0.1.5 +flickr-api==0.6.1 +PyTumblr==0.0.8 +arrow==0.12.1 +requests==2.19.1
A run

@@ -0,0 +1,9 @@

+#!/usr/bin/env bash + +set -euo pipefail +IFS=$'\n\t' + +python3 Tumblr.py +python3 LastFM.py +python3 DeviantArt.py +python3 Flickr.py
A settings.py

@@ -0,0 +1,31 @@

+import os +import re +import argparse +import logging + +base = os.path.abspath(os.path.expanduser('~/Projects/petermolnar.net')) + +paths = { + 'archive': os.path.join(base, 'archive'), +} + +loglevels = { + 'critical': 50, + 'error': 40, + 'warning': 30, + 'info': 20, + 'debug': 10 +} + +_parser = argparse.ArgumentParser(description='Parameters for silo.pasta') +_parser.add_argument( + '--loglevel', + default='info', + help='change loglevel' +) + +args = vars(_parser.parse_args()) +logging.basicConfig( + level=loglevels[args.get('loglevel')], + format='%(asctime)s - %(levelname)s - %(message)s' +)