initial release with LastFM, Flickr, Tumblr, DeviantArt support

This commit is contained in:
Peter Molnar 2018-10-15 14:16:10 +01:00
commit 83bb380bf2
9 changed files with 767 additions and 0 deletions

5
.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
keys.py
.venv
__pycache
keys.py
__pycache__

137
DeviantArt.py Normal file
View file

@ -0,0 +1,137 @@
import os
import glob
import deviantart
from bleach import clean
import arrow
import keys
import common
import settings
from pprint import pprint
import logging
class DAFavs(common.Favs):
def __init__(self):
super().__init__('deviantart')
self.client = deviantart.Api(
keys.deviantart.get('key'),
keys.deviantart.get('secret'),
scope='user'
)
self.favfolder = None
def run(self):
offset = 0
while not self.favfolder:
try:
folders = self.client.get_collections(
username=keys.deviantart.get('username'),
offset=offset
)
offset = folders.get('next_offset')
for r in folders.get('results'):
if r.get('name') == 'Featured':
self.favfolder = r.get('folderid')
if (folders.get('has_more') == False):
break
except deviantart.api.DeviantartError as e:
print(e)
break
offset = 0
has_more = True
while has_more:
try:
fetched = self.client.get_collection(
self.favfolder,
username=keys.deviantart.get('username'),
offset=offset,
)
for r in fetched.get('results'):
fav = DAFav(r)
fav.run()
offset = fetched.get('next_offset')
has_more = fetched.get('has_more')
if (has_more == False):
break
except deviantart.api.DeviantartError as e:
print(e)
break
class DAFav(common.ImgFav):
def __init__(self, deviation, ):
self.deviation = deviation
def __str__(self):
return "fav-of %s" % (self.deviation.url)
@property
def author(self):
return {
'name': self.deviation.author,
'url': 'http://%s.deviantart.com' % self.deviation.author
}
@property
def id(self):
return self.deviation.deviationid
@property
def url(self):
return self.deviation.url
@property
def content(self):
if self.deviation.excerpt:
return "%s" % self.deviation.excerpt
return ''
@property
def title(self):
title = self.deviation.title
if not len(title):
title = common.slugfname(self.url)
return clean(title.strip())
@property
def targetprefix(self):
return os.path.join(
settings.paths.get('archive'),
'favorite',
"deviantart_%s_%s_%s" % (
common.slugfname('%s' % self.deviation.author),
self.id,
common.slugfname('%s' % self.title)
)
)
@property
def exists(self):
maybe = glob.glob("%s*" % self.targetprefix)
if len(maybe):
return True
return False
@property
def published(self):
return arrow.get(self.deviation.published_time)
@property
def tags(self):
return [self.deviation.category]
@property
def images(self):
f = "%s%s" % (self.targetprefix, common.TMPFEXT)
return {
f: self.deviation.content.get('src')
}
def run(self):
if not self.exists:
self.fetch_images()
if __name__ == '__main__':
t = DAFavs()
t.run()

225
Flickr.py Normal file
View file

@ -0,0 +1,225 @@
import os
import glob
import flickr_api
from bleach import clean
import arrow
import keys
import common
import settings
from pprint import pprint
import logging
class FlickrFavs(common.Favs):
def __init__(self):
super().__init__('flickr')
flickr_api.set_keys(
api_key = keys.flickr.get('key'),
api_secret = keys.flickr.get('secret')
)
self.user = flickr_api.Person.findByUserName(
keys.flickr.get('username')
)
def run(self):
pages = 1
page = 1
while page <= pages:
#try:
fetched = self.user.getFavorites(
user_id=self.user.id,
#extras=','.join([
#'description',
#'geo',
#'tags',
#'owner_name',
#'date_upload',
#'url_o',
#'url_k',
#'url_h',
#'url_b',
#'url_c',
#'url_z',
#]),
#'min_fave_date': self.lastpulled
page=page
)
for p in fetched:
photo = FlickrFav(p)
photo.run()
pages = fetched.info.pages
page = page + 1
class FlickrFav(common.ImgFav):
def __init__(self, flickrphoto):
self.flickrphoto = flickrphoto
self.info = flickrphoto.getInfo()
self.owner = self.info.get('owner')
def __str__(self):
return "fav-of %s" % (self.url)
@property
def author(self):
return {
'name': "%s" % self.owner.username,
'url': "%s" % self.owner.getProfileUrl(),
}
@property
def id(self):
return "%s" % self.info.get('id')
@property
def url(self):
return "https://www.flickr.com/photos/%s/%s/" % (
self.owner.id,
self.id
)
@property
def content(self):
return "%s" % self.info.get('description')
@property
def geo(self):
if 'location' not in self.info:
return None
lat = self.info.get('location').get('latitude', None)
lon = self.info.get('location').get('longitude', None)
return (lat, lon)
@property
def title(self):
return clean(''.strip("%s" % self.info.get('title')))
@property
def targetprefix(self):
return os.path.join(
settings.paths.get('archive'),
'favorite',
"flickr_%s_%s" % (
common.slugfname('%s' % self.owner.id),
self.id,
)
)
@property
def exists(self):
maybe = glob.glob("%s*" % self.targetprefix)
if len(maybe):
return True
return False
@property
def published(self):
return arrow.get(self.info.get('dateuploaded'))
@property
def tags(self):
tags = []
for t in self.info.get('tags'):
tags.append("%s" % t.text)
return tags
@property
def images(self):
sizes = self.flickrphoto.getSizes()
for maybe in ['Original', 'Large 2048', 'Large 1600', 'Large']:
if maybe in sizes:
f = "%s%s" % (self.targetprefix, common.TMPFEXT)
return {
f: sizes.get(maybe).get('source')
}
def run(self):
if not self.exists:
self.fetch_images()
if __name__ == '__main__':
t = FlickrFavs()
t.run()
#https://api.flickr.com/services/rest/?method=flickr.favorites.getPublicList&api_key=80a5c2e7fdad3ed1304298850caab99d&user_id=36003160%40N08&per_page=500&format=json&nojsoncallback=1
#class FlickrFavs(Favs):
#url = 'https://api.flickr.com/services/rest/'
#def __init__(self):
#super().__init__('flickr')
#self.get_uid()
#self.params = {
#'method': 'flickr.favorites.getList',
#'api_key': shared.config.get('api_flickr', 'api_key'),
#'user_id': self.uid,
#'extras': ','.join([
#'description',
#'geo',
#'tags',
#'owner_name',
#'date_upload',
#'url_o',
#'url_k',
#'url_h',
#'url_b',
#'url_c',
#'url_z',
#]),
#'per_page': 500, # maximim
#'format': 'json',
#'nojsoncallback': '1',
#'min_fave_date': self.lastpulled
#}
#def get_uid(self):
#params = {
#'method': 'flickr.people.findByUsername',
#'api_key': shared.config.get('api_flickr', 'api_key'),
#'format': 'json',
#'nojsoncallback': '1',
#'username': shared.config.get('api_flickr', 'username'),
#}
#r = requests.get(
#self.url,
#params=params
#)
#parsed = json.loads(r.text)
#self.uid = parsed.get('user', {}).get('id')
#def getpaged(self, offset):
#logging.info('requesting page #%d of paginated results', offset)
#self.params.update({
#'page': offset
#})
#r = requests.get(
#self.url,
#params=self.params
#)
#parsed = json.loads(r.text)
#return parsed.get('photos', {}).get('photo', [])
#def run(self):
#r = requests.get(self.url, params=self.params)
#js = json.loads(r.text)
#js = js.get('photos', {})
#photos = js.get('photo', [])
#total = int(js.get('pages', 1))
#current = int(js.get('page', 1))
#cntr = total - current
#while cntr > 0:
#current = current + 1
#paged = self.getpaged(current)
#photos = photos + paged
#cntr = total - current
#for photo in photos:
#fav = FlickrFav(photo)
#if not fav.exists:
#fav.run()
## fav.fix_extension()

102
LastFM.py Normal file
View file

@ -0,0 +1,102 @@
import os
import csv
import json
import logging
from operator import attrgetter
from collections import namedtuple
import requests
import arrow
import settings
import keys
from pprint import pprint
Track = namedtuple(
'Track',
['timestamp', 'artist', 'album', 'title', 'artistid', 'albumid', 'img']
)
class LastFM(object):
url = 'http://ws.audioscrobbler.com/2.0/'
def __init__(self):
self.params = {
'method': 'user.getrecenttracks',
'user': keys.lastfm.get('username'),
'api_key': keys.lastfm.get('key'),
'format': 'json',
'limit': '200'
}
if os.path.isfile(self.target):
mtime = os.path.getmtime(self.target)
self.params.update({'from': mtime})
@property
def target(self):
return os.path.join(
settings.paths.get('archive'),
'lastfm.csv'
)
@property
def exists(self):
return os.path.isfile(self.target)
def extracttracks(self, data):
tracks = []
for track in data.get('track', []):
if 'date' not in track:
continue
entry = Track(
arrow.get(
int(track.get('date').get('uts'))
).format('YYYY-MM-DDTHH:mm:ssZ'),
track.get('artist').get('#text', ''),
track.get('album').get('#text', ''),
track.get('name', ''),
track.get('artist').get('mbid', ''),
track.get('album').get('mbid', ''),
track.get('image', [])[-1].get('#text', ''),
)
tracks.append(entry)
return tracks
def fetch(self):
r = requests.get(self.url, params=self.params)
return json.loads(r.text).get('recenttracks')
def run(self):
data = self.fetch()
tracks = self.extracttracks(data)
total = int(data.get('@attr').get('totalPages'))
current = int(data.get('@attr').get('page'))
cntr = total - current
if not len(tracks):
return
while cntr > 0:
current = current + 1
cntr = total - current
logging.info('requesting page #%d of paginated results', current)
self.params.update({
'page': current
})
data = self.fetch()
tracks = tracks + self.extracttracks(data)
if not self.exists:
with open(self.target, 'w') as f:
writer = csv.DictWriter(f, fieldnames=Track._fields)
writer.writeheader()
if len(tracks):
with open(self.target, 'a') as f:
writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
writer.writerows(sorted(tracks, key=attrgetter('timestamp')))
if __name__ == '__main__':
lfm = LastFM()
lfm.run()

120
Tumblr.py Normal file
View file

@ -0,0 +1,120 @@
import os
import glob
import pytumblr
import arrow
import keys
import common
import settings
from bleach import clean
from pprint import pprint
class TumblrFavs(common.Favs):
def __init__(self):
super().__init__('tumblr')
self.client = pytumblr.TumblrRestClient(
keys.tumblr.get('key'),
keys.tumblr.get('secret'),
keys.tumblr.get('oauth_token'),
keys.tumblr.get('oauth_secret')
)
def run(self):
likes = self.client.likes(after=self.since)
if 'liked_posts' not in likes:
return
for like in likes.get('liked_posts'):
fav = TumblrFav(like)
fav.run()
class TumblrFav(common.ImgFav):
def __init__(self, data):
self.data = data
def __str__(self):
return "like-of %s from blog %s" % (self.url, self.blogname)
@property
def blogname(self):
return self.data.get('blog_name')
@property
def id(self):
return self.data.get('id')
@property
def url(self):
return self.data.get('post_url')
@property
def content(self):
return "%s" % self.data.get('caption', '')
@property
def title(self):
title = self.data.get('summary', '')
if not len(title):
title = self.data.get('slug', '')
if not len(title):
title = common.slugfname(self.url)
return clean(title.strip())
@property
def targetprefix(self):
return os.path.join(
settings.paths.get('archive'),
'favorite',
"tumblr_%s_%s" % (self.blogname, self.id)
)
@property
def exists(self):
maybe = glob.glob("%s*" % self.targetprefix)
if len(maybe):
return True
return False
@property
def published(self):
maybe = self.data.get('liked_timestamp', False)
if not maybe:
maybe = self.data.get('date', False)
if not maybe:
maybe = arrow.utcnow().timestamp
return arrow.get(maybe)
@property
def tags(self):
return self.data.get('tags', [])
@property
def author(self):
return {
'name': self.blogname,
'url': 'http://%s.tumblr.com' % self.blogname
}
@property
def images(self):
r = {}
cntr = 0
for p in self.data.get('photos', []):
f = "%s-%d%s" % (self.targetprefix, cntr, common.TMPFEXT)
r.update({
f: p.get('original_size').get('url')
})
cntr = cntr + 1
return r
def run(self):
if not self.exists:
self.fetch_images()
if __name__ == '__main__':
t = TumblrFavs()
t.run()

133
common.py Normal file
View file

@ -0,0 +1,133 @@
import os
import glob
import imghdr
import re
import logging
import shutil
import subprocess
from slugify import slugify
import requests
import arrow
import settings
from pprint import pprint
TMPFEXT = '.xyz'
def slugfname(url):
return slugify(
re.sub(r"^https?://(?:www)?", "", url),
only_ascii=True,
lower=True
)[:200]
class Favs(object):
def __init__(self, silo):
self.silo = silo
@property
def since(self):
mtime = 0
d = os.path.join(
settings.paths.get('archive'),
'favorite',
"%s-*" % self.silo
)
files = glob.glob(d)
if (len(files)):
for f in files:
ftime = int(os.path.getmtime(f))
if ftime > mtime:
mtime = ftime
# TODO why is this here?
mtime = mtime + 1
return mtime
class ImgFav(object):
def __init__(self):
return
def fetch_images(self):
for fpath, url in self.images.items():
self.fetch_image(fpath, url)
def fetch_image(self, fpath, url):
logging.info("pulling image %s to %s", url, fpath)
r = requests.get(url, stream=True)
if r.status_code == 200:
with open(fpath, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
imgtype = imghdr.what(fpath)
if not imgtype:
os.remove(fpath)
return
if imgtype in ['jpg', 'jpeg', 'png']:
self.write_exif(fpath)
os.rename(fpath, fpath.replace(TMPFEXT, ".%s" % (imgtype)))
def write_exif(self, fpath):
logging.info('populating EXIF data of %s' % fpath)
geo_lat = False
geo_lon = False
if hasattr(self, 'geo') and self.geo != None:
lat, lon = self.geo
if lat and lon and 'null' != lat and 'null' != lon:
geo_lat = lat
geo_lon = lon
params = [
'exiftool',
'-overwrite_original',
'-XMP:Copyright=Copyright %s %s (%s)' % (
self.published.to('utc').format('YYYY'),
self.author.get('name'),
self.author.get('url'),
),
'-XMP:Source=%s' % self.url,
'-XMP:ReleaseDate=%s' % self.published.to('utc').format('YYYY:MM:DD HH:mm:ss'),
'-XMP:Headline=%s' % self.title,
'-XMP:Description=%s' % self.content,
]
for t in self.tags:
params.append('-XMP:HierarchicalSubject+=%s' % t)
params.append('-XMP:Subject+=%s' % t)
if geo_lat and geo_lon:
geo_lat = round(float(geo_lat), 6)
geo_lon = round(float(geo_lon), 6)
if geo_lat < 0:
GPSLatitudeRef = 'S'
else:
GPSLatitudeRef = 'N'
if geo_lon < 0:
GPSLongitudeRef = 'W'
else:
GPSLongitudeRef = 'E'
params.append('-GPSLongitude=%s' % abs(geo_lon))
params.append('-GPSLatitude=%s' % abs(geo_lat))
params.append('-GPSLongitudeRef=%s' % GPSLongitudeRef)
params.append('-GPSLatitudeRef=%s' % GPSLatitudeRef)
params.append(fpath)
p = subprocess.Popen(
params,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = p.communicate()
_original = '%s_original' % fpath
if os.path.exists(_original):
os.unlink(_original)

5
requirements.txt Normal file
View file

@ -0,0 +1,5 @@
deviantart==0.1.5
flickr-api==0.6.1
PyTumblr==0.0.8
arrow==0.12.1
requests==2.19.1

9
run Executable file
View file

@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
IFS=$'\n\t'
python3 Tumblr.py
python3 LastFM.py
python3 DeviantArt.py
python3 Flickr.py

31
settings.py Normal file
View file

@ -0,0 +1,31 @@
import os
import re
import argparse
import logging
base = os.path.abspath(os.path.expanduser('~/Projects/petermolnar.net'))
paths = {
'archive': os.path.join(base, 'archive'),
}
loglevels = {
'critical': 50,
'error': 40,
'warning': 30,
'info': 20,
'debug': 10
}
_parser = argparse.ArgumentParser(description='Parameters for silo.pasta')
_parser.add_argument(
'--loglevel',
default='info',
help='change loglevel'
)
args = vars(_parser.parse_args())
logging.basicConfig(
level=loglevels[args.get('loglevel')],
format='%(asctime)s - %(levelname)s - %(message)s'
)