nasg/pesos.py
2017-07-05 21:09:06 +00:00

755 lines
22 KiB
Python
Executable file

#!/usr/bin/env python3
import json
import os
import hashlib
import glob
import frontmatter
import requests
import shared
import logging
import re
import shutil
import arrow
import bs4
from slugify import slugify
import oauth
import argparse
""" TODO
- followings?
- favs from:
- wordpress.com
- twitter
"""
class Bookmark(object):
def __init__(self, title, url, fname=None):
self.fm = frontmatter.loads('')
fname = fname or slugify(title)
self.fname = "%s.md" % fname
self.target = os.path.join(
shared.config.get('source', 'contentdir'),
shared.config.get('source', 'bookmarks'),
self.fname
)
self.fm.metadata = {
'published': arrow.utcnow().format(shared.ARROWISO),
'title': title,
'bookmark-of': url,
}
def write(self):
logging.info('saving bookmark to %s', self.target)
with open(self.target, 'wt') as t:
t.write(frontmatter.dumps(self.fm))
class HNBookmarks(object):
prefix = 'hn-'
def __init__(self):
self.url = 'https://news.ycombinator.com/favorites?id=%s' % (
shared.config.get('hackernews', 'user_id')
)
@property
def existing(self):
if hasattr(self, '_existing'):
return self._existing
d = os.path.join(
shared.config.get('source', 'contentdir'),
"*",
"%s*.md" % self.prefix
)
files = reversed(sorted(glob.glob(d)))
self._existing = [
os.path.basename(f.replace(self.prefix, '').replace('.md', ''))
for f in files
]
return self._existing
def run(self):
r = requests.get(self.url)
soup = bs4.BeautifulSoup(r.text, "html5lib")
rows = soup.find_all('tr', attrs={'class':'athing' })
for row in rows:
rid = row.get('id')
if rid in self.existing:
continue
link = row.find('a', attrs={'class':'storylink' })
url = link.get('href')
title = " ".join(link.contents)
fname = "%s%s" % (self.prefix, rid)
bookmark = Bookmark(title, url, fname)
bookmark.write()
class Fav(object):
def __init__(self):
self.arrow = arrow.utcnow()
self.fm = frontmatter.loads('')
@property
def target(self):
return os.path.join(
shared.config.get('source', 'contentdir'),
shared.config.get('source', 'favs'),
self.fname
)
@property
def exists(self):
return os.path.isfile(self.target)
@property
def imgname(self):
# the _ is to differentiate between my photos, where the md and jpg name is the same, and favs
return self.fname.replace('.md', '_.jpg')
@property
def imgtarget(self):
return os.path.join(
shared.config.get('source', 'filesdir'),
self.imgname
)
def saveimg(self, url, target=None):
target = target or self.imgtarget
if os.path.isfile(target):
logging.error("%s already exists, refusing to overwrite", target)
return
logging.info("pulling image %s to files", url)
r = requests.get(url, stream=True)
if r.status_code == 200:
with open(target, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
def write(self):
logging.info('saving fav to %s', self.target)
with open(self.target, 'wt') as t:
t.write(frontmatter.dumps(self.fm))
os.utime(self.target, (self.arrow.timestamp, self.arrow.timestamp))
class PinterestFav(Fav):
def __init__(self, url):
super(PinterestFav, self).__init__()
self.url = url
self.fname = "pinterest-%s.md" % (list(filter(None, url.split('/')))[-1])
def run(self):
try:
r = requests.get(self.url)
soup = bs4.BeautifulSoup(r.text, 'lxml')
ld = json.loads(soup.find('script', type='application/ld+json').text)
imgurl = ld.get('image')
self.saveimg(imgurl)
self.fm.metadata = {
'published': arrow.get(
ld.get('datePublished', arrow.utcnow().timestamp)
).format(shared.ARROWISO),
'title': ld.get('headline', self.url),
'favorite-of': self.url,
'image': self.imgname
}
content = ld.get('articleBody', '')
content = shared.Pandoc(False).convert(content)
self.fm.content = content
except Exception as e:
logging.error('saving pinterest fav %s failed: %s', self.url, e)
return
class FlickrFav(Fav):
def __init__(self, photo):
super(FlickrFav, self).__init__()
self.photo = photo
self.ownerid = photo.get('owner')
self.photoid = photo.get('id')
self.fname = "flickr-%s-%s.md" % (self.ownerid, self.photoid)
self.url = "https://www.flickr.com/photos/%s/%s" % (self.ownerid, self.photoid)
def run(self):
img = self.photo.get('url_b', self.photo.get('url_z', False))
if not img:
logging.error("image url was empty for %s, skipping fav", self.url)
return
self.saveimg(img)
self.arrow = arrow.get(
self.photo.get('date_faved', arrow.utcnow().timestamp)
)
self.fm.metadata = {
'published': self.arrow.format(shared.ARROWISO),
'title': '%s' % self.photo.get('title', self.fname),
'favorite-of': self.url,
'flickr_tags': self.photo.get('tags', '').split(' '),
'geo': {
'latitude': self.photo.get('latitude', ''),
'longitude': self.photo.get('longitude', ''),
},
'author': {
'name': self.photo.get('owner_name'),
'url': 'https://www.flickr.com/people/%s' % (
self.photo.get('owner')
),
},
'image': self.imgname
}
content = self.photo.get('description', {}).get('_content', '')
content = shared.Pandoc(False).convert(content)
self.fm.content = content
class FivehpxFav(Fav):
def __init__(self, photo):
super(FivehpxFav, self).__init__()
self.photo = photo
self.ownerid = photo.get('user_id')
self.photoid = photo.get('id')
self.fname = "500px-%s-%s.md" % (self.ownerid, self.photoid)
self.url = "https://www.500px.com%s" % (photo.get('url'))
def run(self):
img = self.photo.get('images')[0].get('url')
if not img:
logging.error("image url was empty for %s, skipping fav", self.url)
return
self.saveimg(img)
self.arrow = arrow.get(
self.photo.get('created_at', arrow.utcnow().timestamp)
)
self.fm.metadata = {
'published': self.arrow.format(shared.ARROWISO),
'title': '%s' % self.photo.get('name', self.fname),
'favorite-of': self.url,
'fivehpx_tags': self.photo.get('tags', []),
'geo': {
'latitude': self.photo.get('latitude', ''),
'longitude': self.photo.get('longitude', ''),
},
'author': {
'name': self.photo.get('user').get('fullname', self.ownerid),
'url': 'https://www.500px.com/%s' % (
self.photo.get('user').get('username', self.ownerid)
),
},
'image': self.imgname
}
content = self.photo.get('description', '')
if content:
content = shared.Pandoc(False).convert(content)
else:
content = ''
self.fm.content = content
class TumblrFav(Fav):
def __init__(self, like):
super(TumblrFav, self).__init__()
self.like = like
self.blogname = like.get('blog_name')
self.postid = like.get('id')
self.fname = "tumblr-%s-%s.md" % (self.blogname, self.postid)
self.url = like.get('post_url')
self.images = []
def run(self):
icntr = 0
for p in self.like.get('photos', []):
i = p.get('original_size').get('url')
logging.debug('parsing image %s', i)
n = self.fname.replace('.md', '_%d.jpg' % icntr)
self.images.append(n)
nt = os.path.join(
shared.config.get('source', 'filesdir'),
n
)
self.saveimg(i, nt)
icntr = icntr + 1
self.arrow = arrow.get(
self.like.get('liked_timestamp',
self.like.get('date',
arrow.utcnow().timestamp
)
)
)
self.fm.content = self.like.get('caption', '')
title = self.like.get('summary', '').strip()
if not len(title):
title = self.like.get('slug', '').strip()
if not len(title):
title = shared.slugfname(self.like.get('post_url'))
self.fm.metadata = {
'published': self.arrow.format(shared.ARROWISO),
'title': title,
'favorite-of': self.url,
'tumblr_tags': self.like.get('tags'),
'author': {
'name': self.like.get('blog_name'),
'url': 'http://%s.tumblr.com' % self.like.get('blog_name')
},
'images': self.images
}
class DAFav(Fav):
def __init__(self, fav):
super(DAFav, self).__init__()
self.fav = fav
self.deviationid = fav.get('deviationid')
self.url = fav.get('url')
self.title = fav.get('title', False) or self.deviationid
self.author = self.fav.get('author').get('username')
self.fname = "deviantart-%s-by-%s.md" % (
slugify(self.title), slugify(self.author)
)
self.image = fav.get('content', {}).get('src')
def run(self):
self.saveimg(self.image)
self.arrow = arrow.get(
self.fav.get('published_time', arrow.utcnow().timestamp)
)
self.fm.metadata = {
'published': self.arrow.format(shared.ARROWISO),
'title': '%s' % self.title,
'favorite-of': self.url,
'da_tags': [t.get('tag_name') for t in self.fav.get('meta', {}).get('tags', [])],
'author': {
'name': self.author,
'url': 'https://%s.deviantart.com' % (self.author),
},
'image': self.imgname
}
content = self.fav.get('meta', {}).get('description', '')
content = shared.Pandoc(False).convert(content)
self.fm.content = content
class Favs(object):
def __init__(self, confgroup):
self.confgroup = confgroup
self.url = shared.config.get(confgroup, 'fav_api')
@property
def lastpulled(self):
mtime = 0
d = os.path.join(
shared.config.get('source', 'contentdir'),
shared.config.get('source', 'favs'),
"%s-*.md" % self.confgroup
)
files = glob.glob(d)
for f in files:
ftime = int(os.path.getmtime(f))
if ftime > mtime:
mtime = ftime
mtime = mtime + 1
logging.debug("last flickr fav timestamp: %s", mtime)
return mtime
class FlickrFavs(Favs):
def __init__(self):
super(FlickrFavs, self).__init__('flickr')
self.params = {
'method': 'flickr.favorites.getList',
'api_key': shared.config.get('flickr', 'api_key'),
'user_id': shared.config.get('flickr', 'user_id'),
'extras': 'description,geo,tags,url_z,url_b,owner_name,date_upload',
'per_page': 500,
'format': 'json',
'nojsoncallback': '1',
'min_fave_date': self.lastpulled
}
def run(self):
r = requests.get(self.url,params=self.params)
js = json.loads(r.text)
for photo in js.get('photos', {}).get('photo', []):
fav = FlickrFav(photo)
fav.run()
fav.write()
class FivehpxFavs(Favs):
def __init__(self):
super(FivehpxFavs, self).__init__('500px')
self.params = {
'consumer_key': shared.config.get('500px', 'api_key'),
'rpp': 100,
'image_size': 4,
'include_tags': 1,
'include_geo': 1
}
def run(self):
r = requests.get(self.url,params=self.params)
js = json.loads(r.text)
for photo in js.get('photos', []):
fav = FivehpxFav(photo)
if not fav.exists:
fav.run()
fav.write()
class TumblrFavs(Favs):
def __init__(self):
super(TumblrFavs, self).__init__('tumblr')
self.oauth = oauth.TumblrOauth()
self.params = {
'after': self.lastpulled
}
self.likes = []
def getpaged(self, offset):
r = self.oauth.request(
self.url,
params={'offset': offset}
)
return json.loads(r.text)
def run(self):
r = self.oauth.request(
self.url,
params=self.params
)
js = json.loads(r.text)
total = int(js.get('response', {}).get('liked_count', 20))
print('total: %d' % total)
offset = 20
cntr = total - offset
likes = js.get('response', {}).get('liked_posts', [])
while cntr > 0:
paged = self.getpaged(offset)
likes = likes + paged.get('response', {}).get('liked_posts', [])
offset = offset + 20
cntr = total - offset
self.likes = likes
for like in self.likes:
fav = TumblrFav(like)
if not fav.exists:
fav.run()
fav.write()
class DAFavs(Favs):
def __init__(self):
from pprint import pprint
super(DAFavs, self).__init__('deviantart')
self.oauth = oauth.DAOauth()
self.params = {
'limit': 24,
'mature_content': 'true',
'username': shared.config.get('deviantart', 'username')
}
self.likes = []
def getpaged(self, offset):
self.params.update({'offset': offset})
r = self.oauth.request(
self.url,
self.params
)
return json.loads(r.text)
def getsinglemeta(self, daid):
r = self.oauth.request(
'https://www.deviantart.com/api/v1/oauth2/deviation/metadata',
params={
'deviationids[]': daid,
'ext_submission': False,
'ext_camera': False,
'ext_stats': False,
'ext_collection': False,
'mature_content': True,
}
)
meta = {}
try:
meta = json.loads(r.text)
return meta.get('metadata', []).pop()
except:
return meta
def has_more(self, q):
if 'True' == q or 'true' == q:
return True
return False
def run(self):
r = self.oauth.request(
self.url,
self.params
)
js = json.loads(r.text)
has_more = js.get('has_more')
offset = js.get('next_offset')
favs = js.get('results', [])
while True == has_more:
logging.debug('iterating over DA results with offset %d', offset)
paged = self.getpaged(offset)
favs = favs + paged.get('results', [])
has_more = paged.get('has_more')
n = paged.get('next_offset')
if n:
offset = offset + n
self.favs = favs
for fav in self.favs:
f = DAFav(fav)
if f.exists:
continue
f.fav.update({'meta': self.getsinglemeta(fav.get('deviationid'))})
f.run()
f.write()
#class WPFavs(Favs):
#def __init__(self):
#from pprint import pprint
#super(DAFavs, self).__init__('wordpress')
#self.oauth = oauth.DAOauth()
#self.params = {
#'limit': 24,
#'mature_content': 'true',
#'username': shared.config.get('deviantart', 'username')
#}
#self.likes = []
#def getpaged(self, offset):
#self.params.update({'offset': offset})
#r = self.oauth.request(
#self.url,
#self.params
#)
#return json.loads(r.text)
#def getsinglemeta(self, daid):
#r = self.oauth.request(
#'https://www.deviantart.com/api/v1/oauth2/deviation/metadata',
#params={
#'deviationids[]': daid,
#'ext_submission': False,
#'ext_camera': False,
#'ext_stats': False,
#'ext_collection': False,
#'mature_content': True,
#}
#)
#meta = {}
#try:
#meta = json.loads(r.text)
#return meta.get('metadata', []).pop()
#except:
#return meta
#def has_more(self, q):
#if 'True' == q or 'true' == q:
#return True
#return False
#def run(self):
#r = self.oauth.request(
#self.url,
#self.params
#)
#js = json.loads(r.text)
#has_more = js.get('has_more')
#offset = js.get('next_offset')
#favs = js.get('results', [])
#while True == has_more:
#logging.debug('iterating over DA results with offset %d', offset)
#paged = self.getpaged(offset)
#favs = favs + paged.get('results', [])
#has_more = paged.get('has_more')
#n = paged.get('next_offset')
#if n:
#offset = offset + n
#self.favs = favs
#for fav in self.favs:
#f = DAFav(fav)
#if f.exists:
#continue
#f.fav.update({'meta': self.getsinglemeta(fav.get('deviationid'))})
#f.run()
#f.write()
#class Following(object):
#def __init__(self, confgroup):
#self.confgroup = confgroup
#self.url = shared.config.get(confgroup, 'following_api')
#self.followings = []
#class FlickrFollowing(Following):
#def __init__(self):
#super(FlickrFollowing, self).__init__('flickr')
#self.oauth = oauth.FlickrOauth()
#def run(self):
#r = self.oauth.request(self.url, params={
#'method': 'flickr.contacts.getList',
#'format': 'json',
#'nojsoncallback': 1,
#'api_key': shared.config.get(self.confgroup, 'api_key')
#})
#try:
#contacts = json.loads(r.text)
#for c in contacts.get('contacts', {}).get('contact', []):
#self.followings.append({
#'url': "https://www.flickr.com/people/%s/" % c.get('nsid'),
#'name': c.get('realname'),
#'username': c.get('username'),
#'userid': c.get('nsid')
#})
#except Exception as e:
#logging.error('getting following from flickr failed: %s', e)
#class TumblrFollowing(Following):
#def __init__(self):
#super(TumblrFollowing, self).__init__('tumblr')
#self.oauth = oauth.FlickrOauth()
#def run(self):
#r = self.oauth.request(self.url, params={
#'method': 'flickr.contacts.getList',
#'format': 'json',
#'nojsoncallback': 1,
#'api_key': shared.config.get(self.confgroup, 'api_key')
#})
#try:
#contacts = json.loads(r.text)
#for c in contacts.get('contacts', {}).get('contact', []):
#self.followings.append({
#'url': "https://www.flickr.com/people/%s/" % c.get('nsid'),
#'name': c.get('realname'),
#'username': c.get('username'),
#'userid': c.get('nsid')
#})
#except Exception as e:
#logging.error('getting following from flickr failed: %s', e)
#class FlickrFollowing(object):
#def __init__(self):
#super(FlickrFollowing, self).__init__('flickr')
#self.params = {
#'consumer_key': shared.config.get('500px', 'api_key'),
#'rpp': 100,
#'image_size': 4,
#'include_tags': 1,
#'include_geo': 1
#}
#def run(self):
#r = requests.get(self.url,params=self.params)
#js = json.loads(r.text)
#for photo in js.get('photos', []):
#fav = FivehpxFav(photo)
#if not fav.exists:
#fav.run()
#fav.write()
#def run(self):
#https://api.flickr.com/services/rest/?method=flickr.contacts.getList&api_key=27d8a5bf7dabf882ff1c710894041f64&format=json&nojsoncallback=1&auth_token=72157682938907284-9c5f21debeec9833&api_sig=8ac87b900f44debea06a3765ed223680
#class Following(object):
#def __init__(self, confgroup):
#self.confgroup = confgroup
#self.url = shared.config.get(confgroup, 'fav_api')
#class FlickrFollowing(Following):
#def __init__(self):
#super(FlickrFollowing, self).__init__('flickr')
#self.params = {
#'method': 'flickr.contacts.getList',
#'api_key': shared.config.get('flickr', 'api_key'),
#'format': 'json',
#'nojsoncallback': '1',
#}
#def run(self):
#r = requests.get(self.url,params=self.params)
#js = json.loads(r.text)
#pprint(js)
#for contact in js.get('contacts', {}).get('contact', []):
#pprint(contact)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Parameters for NASG')
parser.add_argument(
'--loglevel',
default='error',
help='change loglevel'
)
params = vars(parser.parse_args())
while len(logging.root.handlers) > 0:
logging.root.removeHandler(logging.root.handlers[-1])
logging.basicConfig(
level=shared.LLEVEL[params.get('loglevel')],
format='%(asctime)s - %(levelname)s - %(message)s'
)
flickr = FlickrFavs()
flickr.run()
hn = HNBookmarks()
hn.run()
fivehpx = FivehpxFavs()
fivehpx.run()
tumblr = TumblrFavs()
tumblr.run()
da = DAFavs()
da.run()