299 lines
8.9 KiB
Python
299 lines
8.9 KiB
Python
import json
|
|
import os
|
|
import hashlib
|
|
import glob
|
|
import frontmatter
|
|
import requests
|
|
import shared
|
|
import logging
|
|
import re
|
|
import shutil
|
|
import arrow
|
|
import bs4
|
|
from slugify import slugify
|
|
|
|
from pprint import pprint
|
|
|
|
class Bookmark(object):
|
|
def __init__(self, title, url, fname=None):
|
|
self.fm = frontmatter.loads('')
|
|
fname = fname or slugify(title)
|
|
self.fname = "%s.md" % fname
|
|
self.target = os.path.join(
|
|
shared.config.get('source', 'contentdir'),
|
|
shared.config.get('source', 'bookmarks'),
|
|
self.fname
|
|
)
|
|
self.fm.metadata = {
|
|
'published': arrow.utcnow().format(shared.ARROWISO),
|
|
'title': title,
|
|
'bookmark-of': url,
|
|
}
|
|
|
|
def write(self):
|
|
logging.info('saving bookmark to %s', self.target)
|
|
with open(self.target, 'wt') as t:
|
|
t.write(frontmatter.dumps(self.fm))
|
|
|
|
class HNBookmarks(object):
|
|
prefix = 'hn-'
|
|
def __init__(self):
|
|
self.url = 'https://news.ycombinator.com/favorites?id=%s' % (
|
|
shared.config.get('hackernews', 'user_id')
|
|
)
|
|
|
|
@property
|
|
def existing(self):
|
|
if hasattr(self, '_existing'):
|
|
return self._existing
|
|
|
|
d = os.path.join(
|
|
shared.config.get('source', 'contentdir'),
|
|
"*",
|
|
"%s*.md" % self.prefix
|
|
)
|
|
files = reversed(sorted(glob.glob(d)))
|
|
self._existing = [
|
|
os.path.basename(f.replace(self.prefix, '').replace('.md', ''))
|
|
for f in files
|
|
]
|
|
|
|
return self._existing
|
|
|
|
def run(self):
|
|
r = requests.get(self.url)
|
|
soup = bs4.BeautifulSoup(r.text, "html5lib")
|
|
rows = soup.find_all('tr', attrs={'class':'athing' })
|
|
for row in rows:
|
|
rid = row.get('id')
|
|
if rid in self.existing:
|
|
continue
|
|
|
|
link = row.find('a', attrs={'class':'storylink' })
|
|
url = link.get('href')
|
|
title = " ".join(link.contents)
|
|
fname = "%s%s" % (self.prefix, rid)
|
|
|
|
bookmark = Bookmark(title, url, fname)
|
|
bookmark.write()
|
|
|
|
class Fav(object):
|
|
def __init__(self):
|
|
self.arrow = arrow.utcnow()
|
|
self.fm = frontmatter.loads('')
|
|
|
|
@property
|
|
def target(self):
|
|
return os.path.join(
|
|
shared.config.get('source', 'contentdir'),
|
|
shared.config.get('source', 'favs'),
|
|
self.fname
|
|
)
|
|
|
|
@property
|
|
def exists(self):
|
|
return os.path.isfile(self.target)
|
|
|
|
@property
|
|
def imgname(self):
|
|
# the _ is to differentiate between my photos, where the md and jpg name is the same, and favs
|
|
return self.fname.replace('.md', '_.jpg')
|
|
|
|
@property
|
|
def imgtarget(self):
|
|
return os.path.join(
|
|
shared.config.get('source', 'filesdir'),
|
|
self.imgname
|
|
)
|
|
|
|
def saveimg(self, url):
|
|
target = self.imgtarget
|
|
if os.path.isfile(target):
|
|
logging.error("%s already exists, refusing to overwrite", target)
|
|
return
|
|
|
|
logging.info("pulling image %s to files", url)
|
|
r = requests.get(url, stream=True)
|
|
if r.status_code == 200:
|
|
with open(target, 'wb') as f:
|
|
r.raw.decode_content = True
|
|
shutil.copyfileobj(r.raw, f)
|
|
|
|
def write(self):
|
|
logging.info('saving fav to %s', self.target)
|
|
with open(self.target, 'wt') as t:
|
|
t.write(frontmatter.dumps(self.fm))
|
|
os.utime(self.target, (self.arrow.timestamp, self.arrow.timestamp))
|
|
|
|
|
|
class FlickrFav(Fav):
|
|
def __init__(self, photo):
|
|
super(FlickrFav, self).__init__()
|
|
self.photo = photo
|
|
self.ownerid = photo.get('owner')
|
|
self.photoid = photo.get('id')
|
|
self.fname = "flickr-%s-%s.md" % (self.ownerid, self.photoid)
|
|
self.url = "https://www.flickr.com/photos/%s/%s" % (self.ownerid, self.photoid)
|
|
|
|
def run(self):
|
|
img = self.photo.get('url_b', self.photo.get('url_z', False))
|
|
if not img:
|
|
logging.error("image url was empty for %s, skipping fav", self.url)
|
|
return
|
|
|
|
self.saveimg(img)
|
|
self.arrow = arrow.get(
|
|
self.photo.get('date_faved', arrow.utcnow().timestamp)
|
|
)
|
|
self.fm.metadata = {
|
|
'published': self.arrow.format(shared.ARROWISO),
|
|
'title': '%s' % self.photo.get('title', self.fname),
|
|
'favorite-of': self.url,
|
|
'flickr_tags': self.photo.get('tags', '').split(' '),
|
|
'geo': {
|
|
'latitude': self.photo.get('latitude', ''),
|
|
'longitude': self.photo.get('longitude', ''),
|
|
},
|
|
'author': {
|
|
'name': self.photo.get('owner_name'),
|
|
'url': 'https://www.flickr.com/people/%s' % (
|
|
self.photo.get('owner')
|
|
),
|
|
},
|
|
'image': self.imgname
|
|
}
|
|
|
|
content = self.photo.get('description', {}).get('_content', '')
|
|
content = shared.Pandoc(False).convert(content)
|
|
self.fm.content = content
|
|
|
|
|
|
class FivehpxFav(Fav):
|
|
def __init__(self, photo):
|
|
super(FivehpxFav, self).__init__()
|
|
self.photo = photo
|
|
self.ownerid = photo.get('user_id')
|
|
self.photoid = photo.get('id')
|
|
self.fname = "500px-%s-%s.md" % (self.ownerid, self.photoid)
|
|
self.url = "https://www.500px.com%s" % (photo.get('url'))
|
|
|
|
def run(self):
|
|
img = self.photo.get('images')[0].get('url')
|
|
if not img:
|
|
logging.error("image url was empty for %s, skipping fav", self.url)
|
|
return
|
|
|
|
self.saveimg(img)
|
|
self.arrow = arrow.get(
|
|
self.photo.get('created_at', arrow.utcnow().timestamp)
|
|
)
|
|
self.fm.metadata = {
|
|
'published': self.arrow.format(shared.ARROWISO),
|
|
'title': '%s' % self.photo.get('name', self.fname),
|
|
'favorite-of': self.url,
|
|
'fivehpx_tags': self.photo.get('tags', []),
|
|
'geo': {
|
|
'latitude': self.photo.get('latitude', ''),
|
|
'longitude': self.photo.get('longitude', ''),
|
|
},
|
|
'author': {
|
|
'name': self.photo.get('user').get('fullname', self.ownerid),
|
|
'url': 'https://www.500px.com/%s' % (
|
|
self.photo.get('user').get('username', self.ownerid)
|
|
),
|
|
},
|
|
'image': self.imgname
|
|
}
|
|
|
|
content = self.photo.get('description', '')
|
|
if content:
|
|
content = shared.Pandoc(False).convert(content)
|
|
else:
|
|
content = ''
|
|
self.fm.content = content
|
|
|
|
class Favs(object):
|
|
def __init__(self, confgroup):
|
|
self.confgroup = confgroup
|
|
self.url = shared.config.get(confgroup, 'fav_api')
|
|
|
|
@property
|
|
def lastpulled(self):
|
|
mtime = 0
|
|
d = os.path.join(
|
|
shared.config.get('source', 'contentdir'),
|
|
shared.config.get('source', 'favs'),
|
|
"%s-*.md" % self.confgroup
|
|
)
|
|
files = glob.glob(d)
|
|
for f in files:
|
|
ftime = int(os.path.getmtime(f))
|
|
if ftime > mtime:
|
|
mtime = ftime
|
|
|
|
mtime = mtime + 1
|
|
logging.debug("last flickr fav timestamp: %s", mtime)
|
|
return mtime
|
|
|
|
|
|
class FlickrFavs(Favs):
|
|
def __init__(self):
|
|
super(FlickrFavs, self).__init__('flickr')
|
|
self.params = {
|
|
'method': 'flickr.favorites.getList',
|
|
'api_key': shared.config.get('flickr', 'api_key'),
|
|
'user_id': shared.config.get('flickr', 'user_id'),
|
|
'extras': 'description,geo,tags,url_z,url_b,owner_name,date_upload',
|
|
'per_page': 500,
|
|
'format': 'json',
|
|
'nojsoncallback': '1',
|
|
'min_fave_date': self.lastpulled
|
|
}
|
|
|
|
def run(self):
|
|
r = requests.get(self.url,params=self.params)
|
|
js = json.loads(r.text)
|
|
for photo in js.get('photos', {}).get('photo', []):
|
|
fav = FlickrFav(photo)
|
|
fav.run()
|
|
fav.write()
|
|
|
|
|
|
class FivehpxFavs(Favs):
|
|
def __init__(self):
|
|
super(FivehpxFavs, self).__init__('500px')
|
|
self.params = {
|
|
'consumer_key': shared.config.get('500px', 'api_key'),
|
|
'rpp': 100,
|
|
'image_size': 4,
|
|
'include_tags': 1,
|
|
'include_geo': 1
|
|
}
|
|
|
|
def run(self):
|
|
r = requests.get(self.url,params=self.params)
|
|
js = json.loads(r.text)
|
|
for photo in js.get('photos', []):
|
|
fav = FivehpxFav(photo)
|
|
if not fav.exists:
|
|
fav.run()
|
|
fav.write()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
while len(logging.root.handlers) > 0:
|
|
logging.root.removeHandler(logging.root.handlers[-1])
|
|
|
|
logging.basicConfig(
|
|
level=20,
|
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
)
|
|
|
|
flickr = FlickrFavs()
|
|
flickr.run()
|
|
|
|
hn = HNBookmarks()
|
|
hn.run()
|
|
|
|
fivehpx = FivehpxFavs()
|
|
fivehpx.run()
|