From fc54524162628a8545dcac1db7f5cb6f41d1690a Mon Sep 17 00:00:00 2001 From: Peter Molnar Date: Mon, 7 Sep 2020 08:35:30 +0100 Subject: [PATCH] pre-cleanup commit --- HackerNews.py | 40 +++++++++++++++++++ Wallabag.py | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++ run.py | 21 +++------- settings.py | 9 +++-- 4 files changed, 157 insertions(+), 19 deletions(-) create mode 100644 HackerNews.py create mode 100644 Wallabag.py diff --git a/HackerNews.py b/HackerNews.py new file mode 100644 index 0000000..0916335 --- /dev/null +++ b/HackerNews.py @@ -0,0 +1,40 @@ +import os +import glob +import logging +import json +import requests +from common import cached_property +import settings +import keys + +class HackerNews(object): + url = "https://hacker-news.firebaseio.com/v0/" + + @property + def tdir(self): + return os.path.join(settings.paths.get("archive"), "hn") + + @cached_property + def existing(self): + return [os.path.basename(fpath).replace(".json", "") for fpath in glob.glob(os.path.join(self.tdir, "*.json"))] + + def run(self): + user = keys.hackernews.get("username") + content = requests.get(f"{self.url}/user/{user}.json") + data = content.json() + if "submitted" not in data: + return + for entry in data["submitted"]: + if entry in self.existing: + logging.debug("skipping HackerNews entry %s", entry) + continue + entry_data = requests.get(f"{self.url}/item/{entry}.json") + target = os.path.join(self.tdir, f"{entry}.json") + with open(target, "wt") as f: + logging.info("saving HackerNews entry %s", entry) + f.write(json.dumps(entry_data.json(), indent=4, ensure_ascii=False)) + + +if __name__ == "__main__": + hn = HackerNews() + hn.run() diff --git a/Wallabag.py b/Wallabag.py new file mode 100644 index 0000000..ba711ad --- /dev/null +++ b/Wallabag.py @@ -0,0 +1,106 @@ +import os +import glob +import json +import re +import logging +import requests +import settings +import keys +from shutil import copyfileobj +from common import cached_property +from common import url2slug +from pprint import pprint + +RE_FNAME = re.compile(r"(?P[0-9]+)_(?P.*).epub") + + +class Wallabag(object): + def __init__(self): + self.access_token = "" + self.auth = {} + + @property + def tdir(self): + return settings.paths.bookmarks + + @cached_property + def existing(self): + return [ + os.path.basename(fpath) + for fpath in glob.glob(os.path.join(self.tdir, "*")) + ] + + def archive_batch(self, entries): + for entry in entries["_embedded"]["items"]: + ename = url2slug(entry["url"]) + eid = entry["id"] + fname = f"{ename}.epub" + target = os.path.join(self.tdir, fname) + + if fname in self.existing: + logging.debug("skipping existing entry %s", entry["id"]) + else: + with requests.get( + f"{keys.wallabag.url}/api/entries/{eid}/export.epub", + stream=True, + headers=self.auth, + ) as r: + logging.info("saving %s to %s", eid, target) + with open(target, "wb") as f: + copyfileobj(r.raw, f) + + def run(self): + tparams = { + "grant_type": "password", + "client_id": keys.wallabag.client_id, + "client_secret": keys.wallabag.client_secret, + "username": keys.wallabag.username, + "password": keys.wallabag.password, + } + token = requests.post( + f"{keys.wallabag.url}/oauth/v2/token", data=tparams + ) + try: + tdata = token.json() + if "access_token" not in tdata: + logging.error( + "missing access token from wallabag response" + ) + return + except Exception as e: + logging.error("failed to get token from wallabag: %s", e) + return + + self.access_token = tdata["access_token"] + self.auth = {"Authorization": f"Bearer {self.access_token}"} + + r = requests.get( + f"{keys.wallabag.url}/api/entries", headers=self.auth + ) + try: + entries = r.json() + except Exception as e: + logging.error( + "failed to get first page from wallabag: %s", e + ) + return + + batch = entries["limit"] + pages = entries["pages"] + page = entries["page"] + self.archive_batch(entries) + while page < pages: + page = page + 1 + paged = {"perPage": batch, "page": page} + r = requests.get( + f"{keys.wallabag.url}/api/entries", + params=paged, + headers=self.auth, + ) + entries = r.json() + self.archive_batch(entries) + + +if __name__ == "__main__": + wbag = Wallabag() + wbag.run() diff --git a/run.py b/run.py index 9f0ce14..2f40e28 100644 --- a/run.py +++ b/run.py @@ -5,26 +5,17 @@ import LastFM import DeviantArt import Flickr #import Artstation +import HackerNews from pprint import pprint -lfm = LastFM.LastFM() -lfm.run() - -#opml = common.Follows() - silos = [ - DeviantArt.DAFavs(), - Flickr.FlickrFavs(), - Tumblr.TumblrFavs(), +# DeviantArt.DAFavs(), +# Flickr.FlickrFavs(), +# Tumblr.TumblrFavs(), # Artstation.ASFavs(), +# LastFM.LastFM(), + HackerNews.HackerNews() ] for silo in silos: silo.run() - #silo.sync_with_aperture() - #opml.update({silo.silo: silo.feeds}) - -#opml.sync() -#opml.export() -opml = common.Aperture() -opml.export() diff --git a/settings.py b/settings.py index 957b717..bb60d17 100644 --- a/settings.py +++ b/settings.py @@ -8,7 +8,7 @@ class nameddict(dict): __setattr__ = dict.__setitem__ __delattr__ = dict.__delitem__ -base = os.path.abspath(os.path.expanduser("~/Projects/petermolnar.net")) +#base = os.path.abspath(os.path.expanduser("~/")) opml = nameddict({ "owner": "Peter Molnar", @@ -18,9 +18,10 @@ opml = nameddict({ }) paths = nameddict({ - "archive": os.path.join(base, "archive"), - "content": os.path.join(base, "content"), - "bookmarks": os.path.join(base, "archive", "bookmarks") + "archive": os.path.join(os.path.expanduser('~'), "archive"), + #"archive": os.path.join(os.path.expanduser('~'), ""), + #"content": os.path.join(base, "content"), + #"bookmarks": os.path.join(base, "archive", "bookmarks") }) loglevels = {"critical": 50, "error": 40, "warning": 30, "info": 20, "debug": 10}