From 83bb380bf2bb1f4c194cb4fd7f73c76b79a99749 Mon Sep 17 00:00:00 2001 From: Peter Molnar Date: Mon, 15 Oct 2018 14:16:10 +0100 Subject: [PATCH] initial release with LastFM, Flickr, Tumblr, DeviantArt support --- .gitignore | 5 ++ DeviantArt.py | 137 +++++++++++++++++++++++++++++ Flickr.py | 225 +++++++++++++++++++++++++++++++++++++++++++++++ LastFM.py | 102 +++++++++++++++++++++ Tumblr.py | 120 +++++++++++++++++++++++++ common.py | 133 ++++++++++++++++++++++++++++ requirements.txt | 5 ++ run | 9 ++ settings.py | 31 +++++++ 9 files changed, 767 insertions(+) create mode 100644 .gitignore create mode 100644 DeviantArt.py create mode 100644 Flickr.py create mode 100644 LastFM.py create mode 100644 Tumblr.py create mode 100644 common.py create mode 100644 requirements.txt create mode 100755 run create mode 100644 settings.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f1af34a --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +keys.py +.venv +__pycache +keys.py +__pycache__ diff --git a/DeviantArt.py b/DeviantArt.py new file mode 100644 index 0000000..54993ad --- /dev/null +++ b/DeviantArt.py @@ -0,0 +1,137 @@ +import os +import glob +import deviantart +from bleach import clean +import arrow +import keys +import common +import settings +from pprint import pprint +import logging + +class DAFavs(common.Favs): + def __init__(self): + super().__init__('deviantart') + self.client = deviantart.Api( + keys.deviantart.get('key'), + keys.deviantart.get('secret'), + scope='user' + ) + self.favfolder = None + + def run(self): + offset = 0 + while not self.favfolder: + try: + folders = self.client.get_collections( + username=keys.deviantart.get('username'), + offset=offset + ) + offset = folders.get('next_offset') + for r in folders.get('results'): + if r.get('name') == 'Featured': + self.favfolder = r.get('folderid') + if (folders.get('has_more') == False): + break + except deviantart.api.DeviantartError as e: + print(e) + break + + offset = 0 + has_more = True + while has_more: + try: + fetched = self.client.get_collection( + self.favfolder, + username=keys.deviantart.get('username'), + offset=offset, + ) + for r in fetched.get('results'): + fav = DAFav(r) + fav.run() + offset = fetched.get('next_offset') + has_more = fetched.get('has_more') + if (has_more == False): + break + except deviantart.api.DeviantartError as e: + print(e) + break + + +class DAFav(common.ImgFav): + def __init__(self, deviation, ): + self.deviation = deviation + + def __str__(self): + return "fav-of %s" % (self.deviation.url) + + @property + def author(self): + return { + 'name': self.deviation.author, + 'url': 'http://%s.deviantart.com' % self.deviation.author + } + + @property + def id(self): + return self.deviation.deviationid + + @property + def url(self): + return self.deviation.url + + @property + def content(self): + if self.deviation.excerpt: + return "%s" % self.deviation.excerpt + return '' + + @property + def title(self): + title = self.deviation.title + if not len(title): + title = common.slugfname(self.url) + return clean(title.strip()) + + @property + def targetprefix(self): + return os.path.join( + settings.paths.get('archive'), + 'favorite', + "deviantart_%s_%s_%s" % ( + common.slugfname('%s' % self.deviation.author), + self.id, + common.slugfname('%s' % self.title) + ) + ) + + @property + def exists(self): + maybe = glob.glob("%s*" % self.targetprefix) + if len(maybe): + return True + return False + + @property + def published(self): + return arrow.get(self.deviation.published_time) + + @property + def tags(self): + return [self.deviation.category] + + @property + def images(self): + f = "%s%s" % (self.targetprefix, common.TMPFEXT) + return { + f: self.deviation.content.get('src') + } + + def run(self): + if not self.exists: + self.fetch_images() + + +if __name__ == '__main__': + t = DAFavs() + t.run() diff --git a/Flickr.py b/Flickr.py new file mode 100644 index 0000000..7368126 --- /dev/null +++ b/Flickr.py @@ -0,0 +1,225 @@ +import os +import glob +import flickr_api +from bleach import clean +import arrow +import keys +import common +import settings +from pprint import pprint +import logging + +class FlickrFavs(common.Favs): + def __init__(self): + super().__init__('flickr') + flickr_api.set_keys( + api_key = keys.flickr.get('key'), + api_secret = keys.flickr.get('secret') + ) + self.user = flickr_api.Person.findByUserName( + keys.flickr.get('username') + ) + + def run(self): + pages = 1 + page = 1 + while page <= pages: + #try: + fetched = self.user.getFavorites( + user_id=self.user.id, + #extras=','.join([ + #'description', + #'geo', + #'tags', + #'owner_name', + #'date_upload', + #'url_o', + #'url_k', + #'url_h', + #'url_b', + #'url_c', + #'url_z', + #]), + #'min_fave_date': self.lastpulled + page=page + ) + for p in fetched: + photo = FlickrFav(p) + photo.run() + pages = fetched.info.pages + page = page + 1 + + +class FlickrFav(common.ImgFav): + def __init__(self, flickrphoto): + self.flickrphoto = flickrphoto + self.info = flickrphoto.getInfo() + self.owner = self.info.get('owner') + + def __str__(self): + return "fav-of %s" % (self.url) + + @property + def author(self): + return { + 'name': "%s" % self.owner.username, + 'url': "%s" % self.owner.getProfileUrl(), + } + + @property + def id(self): + return "%s" % self.info.get('id') + + @property + def url(self): + return "https://www.flickr.com/photos/%s/%s/" % ( + self.owner.id, + self.id + ) + + @property + def content(self): + return "%s" % self.info.get('description') + + @property + def geo(self): + if 'location' not in self.info: + return None + + lat = self.info.get('location').get('latitude', None) + lon = self.info.get('location').get('longitude', None) + return (lat, lon) + + @property + def title(self): + return clean(''.strip("%s" % self.info.get('title'))) + + @property + def targetprefix(self): + return os.path.join( + settings.paths.get('archive'), + 'favorite', + "flickr_%s_%s" % ( + common.slugfname('%s' % self.owner.id), + self.id, + ) + ) + + @property + def exists(self): + maybe = glob.glob("%s*" % self.targetprefix) + if len(maybe): + return True + return False + + @property + def published(self): + return arrow.get(self.info.get('dateuploaded')) + + @property + def tags(self): + tags = [] + for t in self.info.get('tags'): + tags.append("%s" % t.text) + return tags + + @property + def images(self): + sizes = self.flickrphoto.getSizes() + for maybe in ['Original', 'Large 2048', 'Large 1600', 'Large']: + if maybe in sizes: + f = "%s%s" % (self.targetprefix, common.TMPFEXT) + return { + f: sizes.get(maybe).get('source') + } + + def run(self): + if not self.exists: + self.fetch_images() + + +if __name__ == '__main__': + t = FlickrFavs() + t.run() + +#https://api.flickr.com/services/rest/?method=flickr.favorites.getPublicList&api_key=80a5c2e7fdad3ed1304298850caab99d&user_id=36003160%40N08&per_page=500&format=json&nojsoncallback=1 + + +#class FlickrFavs(Favs): + #url = 'https://api.flickr.com/services/rest/' + + #def __init__(self): + #super().__init__('flickr') + #self.get_uid() + #self.params = { + #'method': 'flickr.favorites.getList', + #'api_key': shared.config.get('api_flickr', 'api_key'), + #'user_id': self.uid, + #'extras': ','.join([ + #'description', + #'geo', + #'tags', + #'owner_name', + #'date_upload', + #'url_o', + #'url_k', + #'url_h', + #'url_b', + #'url_c', + #'url_z', + #]), + #'per_page': 500, # maximim + #'format': 'json', + #'nojsoncallback': '1', + #'min_fave_date': self.lastpulled + #} + + #def get_uid(self): + #params = { + #'method': 'flickr.people.findByUsername', + #'api_key': shared.config.get('api_flickr', 'api_key'), + #'format': 'json', + #'nojsoncallback': '1', + #'username': shared.config.get('api_flickr', 'username'), + #} + #r = requests.get( + #self.url, + #params=params + #) + #parsed = json.loads(r.text) + #self.uid = parsed.get('user', {}).get('id') + + #def getpaged(self, offset): + #logging.info('requesting page #%d of paginated results', offset) + #self.params.update({ + #'page': offset + #}) + #r = requests.get( + #self.url, + #params=self.params + #) + #parsed = json.loads(r.text) + #return parsed.get('photos', {}).get('photo', []) + + #def run(self): + #r = requests.get(self.url, params=self.params) + #js = json.loads(r.text) + #js = js.get('photos', {}) + + #photos = js.get('photo', []) + + #total = int(js.get('pages', 1)) + #current = int(js.get('page', 1)) + #cntr = total - current + + #while cntr > 0: + #current = current + 1 + #paged = self.getpaged(current) + #photos = photos + paged + #cntr = total - current + + #for photo in photos: + #fav = FlickrFav(photo) + #if not fav.exists: + #fav.run() + ## fav.fix_extension() diff --git a/LastFM.py b/LastFM.py new file mode 100644 index 0000000..fe53832 --- /dev/null +++ b/LastFM.py @@ -0,0 +1,102 @@ +import os +import csv +import json +import logging +from operator import attrgetter +from collections import namedtuple +import requests +import arrow +import settings +import keys +from pprint import pprint + +Track = namedtuple( + 'Track', + ['timestamp', 'artist', 'album', 'title', 'artistid', 'albumid', 'img'] +) + +class LastFM(object): + url = 'http://ws.audioscrobbler.com/2.0/' + + def __init__(self): + self.params = { + 'method': 'user.getrecenttracks', + 'user': keys.lastfm.get('username'), + 'api_key': keys.lastfm.get('key'), + 'format': 'json', + 'limit': '200' + } + if os.path.isfile(self.target): + mtime = os.path.getmtime(self.target) + self.params.update({'from': mtime}) + + @property + def target(self): + return os.path.join( + settings.paths.get('archive'), + 'lastfm.csv' + ) + + @property + def exists(self): + return os.path.isfile(self.target) + + + def extracttracks(self, data): + tracks = [] + for track in data.get('track', []): + if 'date' not in track: + continue + entry = Track( + arrow.get( + int(track.get('date').get('uts')) + ).format('YYYY-MM-DDTHH:mm:ssZ'), + track.get('artist').get('#text', ''), + track.get('album').get('#text', ''), + track.get('name', ''), + track.get('artist').get('mbid', ''), + track.get('album').get('mbid', ''), + track.get('image', [])[-1].get('#text', ''), + ) + tracks.append(entry) + return tracks + + def fetch(self): + r = requests.get(self.url, params=self.params) + return json.loads(r.text).get('recenttracks') + + + def run(self): + data = self.fetch() + tracks = self.extracttracks(data) + total = int(data.get('@attr').get('totalPages')) + current = int(data.get('@attr').get('page')) + cntr = total - current + + if not len(tracks): + return + + while cntr > 0: + current = current + 1 + cntr = total - current + logging.info('requesting page #%d of paginated results', current) + self.params.update({ + 'page': current + }) + data = self.fetch() + tracks = tracks + self.extracttracks(data) + + if not self.exists: + with open(self.target, 'w') as f: + writer = csv.DictWriter(f, fieldnames=Track._fields) + writer.writeheader() + + if len(tracks): + with open(self.target, 'a') as f: + writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC) + writer.writerows(sorted(tracks, key=attrgetter('timestamp'))) + + +if __name__ == '__main__': + lfm = LastFM() + lfm.run() diff --git a/Tumblr.py b/Tumblr.py new file mode 100644 index 0000000..8c4611b --- /dev/null +++ b/Tumblr.py @@ -0,0 +1,120 @@ +import os +import glob +import pytumblr +import arrow +import keys +import common +import settings +from bleach import clean +from pprint import pprint + + +class TumblrFavs(common.Favs): + def __init__(self): + super().__init__('tumblr') + self.client = pytumblr.TumblrRestClient( + keys.tumblr.get('key'), + keys.tumblr.get('secret'), + keys.tumblr.get('oauth_token'), + keys.tumblr.get('oauth_secret') + ) + + def run(self): + likes = self.client.likes(after=self.since) + if 'liked_posts' not in likes: + return + + for like in likes.get('liked_posts'): + fav = TumblrFav(like) + + fav.run() + + +class TumblrFav(common.ImgFav): + def __init__(self, data): + self.data = data + + def __str__(self): + return "like-of %s from blog %s" % (self.url, self.blogname) + + @property + def blogname(self): + return self.data.get('blog_name') + + @property + def id(self): + return self.data.get('id') + + @property + def url(self): + return self.data.get('post_url') + + @property + def content(self): + return "%s" % self.data.get('caption', '') + + @property + def title(self): + title = self.data.get('summary', '') + if not len(title): + title = self.data.get('slug', '') + if not len(title): + title = common.slugfname(self.url) + return clean(title.strip()) + + @property + def targetprefix(self): + return os.path.join( + settings.paths.get('archive'), + 'favorite', + "tumblr_%s_%s" % (self.blogname, self.id) + ) + + @property + def exists(self): + maybe = glob.glob("%s*" % self.targetprefix) + if len(maybe): + return True + return False + + @property + def published(self): + maybe = self.data.get('liked_timestamp', False) + if not maybe: + maybe = self.data.get('date', False) + if not maybe: + maybe = arrow.utcnow().timestamp + return arrow.get(maybe) + + @property + def tags(self): + return self.data.get('tags', []) + + @property + def author(self): + return { + 'name': self.blogname, + 'url': 'http://%s.tumblr.com' % self.blogname + } + + @property + def images(self): + r = {} + cntr = 0 + for p in self.data.get('photos', []): + f = "%s-%d%s" % (self.targetprefix, cntr, common.TMPFEXT) + r.update({ + f: p.get('original_size').get('url') + }) + cntr = cntr + 1 + return r + + + def run(self): + if not self.exists: + self.fetch_images() + + +if __name__ == '__main__': + t = TumblrFavs() + t.run() diff --git a/common.py b/common.py new file mode 100644 index 0000000..903e1da --- /dev/null +++ b/common.py @@ -0,0 +1,133 @@ +import os +import glob +import imghdr +import re +import logging +import shutil +import subprocess +from slugify import slugify +import requests +import arrow +import settings +from pprint import pprint + +TMPFEXT = '.xyz' + +def slugfname(url): + return slugify( + re.sub(r"^https?://(?:www)?", "", url), + only_ascii=True, + lower=True + )[:200] + + +class Favs(object): + def __init__(self, silo): + self.silo = silo + + @property + def since(self): + mtime = 0 + d = os.path.join( + settings.paths.get('archive'), + 'favorite', + "%s-*" % self.silo + ) + files = glob.glob(d) + + if (len(files)): + for f in files: + ftime = int(os.path.getmtime(f)) + if ftime > mtime: + mtime = ftime + # TODO why is this here? + mtime = mtime + 1 + return mtime + + +class ImgFav(object): + def __init__(self): + return + + def fetch_images(self): + for fpath, url in self.images.items(): + self.fetch_image(fpath, url) + + def fetch_image(self, fpath, url): + logging.info("pulling image %s to %s", url, fpath) + r = requests.get(url, stream=True) + if r.status_code == 200: + with open(fpath, 'wb') as f: + r.raw.decode_content = True + shutil.copyfileobj(r.raw, f) + + imgtype = imghdr.what(fpath) + if not imgtype: + os.remove(fpath) + return + if imgtype in ['jpg', 'jpeg', 'png']: + self.write_exif(fpath) + os.rename(fpath, fpath.replace(TMPFEXT, ".%s" % (imgtype))) + + def write_exif(self, fpath): + logging.info('populating EXIF data of %s' % fpath) + + geo_lat = False + geo_lon = False + + if hasattr(self, 'geo') and self.geo != None: + lat, lon = self.geo + if lat and lon and 'null' != lat and 'null' != lon: + geo_lat = lat + geo_lon = lon + + params = [ + 'exiftool', + '-overwrite_original', + '-XMP:Copyright=Copyright %s %s (%s)' % ( + self.published.to('utc').format('YYYY'), + self.author.get('name'), + self.author.get('url'), + ), + '-XMP:Source=%s' % self.url, + '-XMP:ReleaseDate=%s' % self.published.to('utc').format('YYYY:MM:DD HH:mm:ss'), + '-XMP:Headline=%s' % self.title, + '-XMP:Description=%s' % self.content, + ] + + for t in self.tags: + params.append('-XMP:HierarchicalSubject+=%s' % t) + params.append('-XMP:Subject+=%s' % t) + + if geo_lat and geo_lon: + geo_lat = round(float(geo_lat), 6) + geo_lon = round(float(geo_lon), 6) + + if geo_lat < 0: + GPSLatitudeRef = 'S' + else: + GPSLatitudeRef = 'N' + + if geo_lon < 0: + GPSLongitudeRef = 'W' + else: + GPSLongitudeRef = 'E' + + params.append('-GPSLongitude=%s' % abs(geo_lon)) + params.append('-GPSLatitude=%s' % abs(geo_lat)) + params.append('-GPSLongitudeRef=%s' % GPSLongitudeRef) + params.append('-GPSLatitudeRef=%s' % GPSLatitudeRef) + + params.append(fpath) + + p = subprocess.Popen( + params, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + stdout, stderr = p.communicate() + _original = '%s_original' % fpath + if os.path.exists(_original): + os.unlink(_original) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e98dc0e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +deviantart==0.1.5 +flickr-api==0.6.1 +PyTumblr==0.0.8 +arrow==0.12.1 +requests==2.19.1 diff --git a/run b/run new file mode 100755 index 0000000..63dc43e --- /dev/null +++ b/run @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -euo pipefail +IFS=$'\n\t' + +python3 Tumblr.py +python3 LastFM.py +python3 DeviantArt.py +python3 Flickr.py diff --git a/settings.py b/settings.py new file mode 100644 index 0000000..2368086 --- /dev/null +++ b/settings.py @@ -0,0 +1,31 @@ +import os +import re +import argparse +import logging + +base = os.path.abspath(os.path.expanduser('~/Projects/petermolnar.net')) + +paths = { + 'archive': os.path.join(base, 'archive'), +} + +loglevels = { + 'critical': 50, + 'error': 40, + 'warning': 30, + 'info': 20, + 'debug': 10 +} + +_parser = argparse.ArgumentParser(description='Parameters for silo.pasta') +_parser.add_argument( + '--loglevel', + default='info', + help='change loglevel' +) + +args = vars(_parser.parse_args()) +logging.basicConfig( + level=loglevels[args.get('loglevel')], + format='%(asctime)s - %(levelname)s - %(message)s' +)