pre-cleanup commit

2020-09-07 08:35:30 +01:00 · 2020-09-07 08:35:30 +01:00 · fc54524162
commit fc54524162
parent d5665d15e7
4 changed files with 157 additions and 19 deletions
--- a/HackerNews.py
+++ b/HackerNews.py
@ -0,0 +1,40 @@
 import os
 import glob
 import logging
 import json
 import requests
 from common import cached_property
 import settings
 import keys
 class HackerNews(object):
    url = "https://hacker-news.firebaseio.com/v0/"
    @property
    def tdir(self):
        return os.path.join(settings.paths.get("archive"), "hn")
    @cached_property
    def existing(self):
        return [os.path.basename(fpath).replace(".json", "") for fpath in glob.glob(os.path.join(self.tdir, "*.json"))]
    def run(self):
        user = keys.hackernews.get("username")
        content = requests.get(f"{self.url}/user/{user}.json")
        data = content.json()
        if "submitted" not in data:
            return
        for entry in data["submitted"]:
            if entry in self.existing:
                logging.debug("skipping HackerNews entry %s", entry)
                continue
            entry_data = requests.get(f"{self.url}/item/{entry}.json")
            target = os.path.join(self.tdir, f"{entry}.json")
            with open(target, "wt") as f:
                logging.info("saving HackerNews entry %s", entry)
                f.write(json.dumps(entry_data.json(), indent=4, ensure_ascii=False))
 if __name__ == "__main__":
    hn = HackerNews()
    hn.run()
--- a/Wallabag.py
+++ b/Wallabag.py
@ -0,0 +1,106 @@
 import os
 import glob
 import json
 import re
 import logging
 import requests
 import settings
 import keys
 from shutil import copyfileobj
 from common import cached_property
 from common import url2slug
 from pprint import pprint
 RE_FNAME = re.compile(r"(?P<id>[0-9]+)_(?P<slug>.*).epub")
 class Wallabag(object):
    def __init__(self):
        self.access_token = ""
        self.auth = {}
    @property
    def tdir(self):
        return settings.paths.bookmarks
    @cached_property
    def existing(self):
        return [
            os.path.basename(fpath)
            for fpath in glob.glob(os.path.join(self.tdir, "*"))
        ]
    def archive_batch(self, entries):
        for entry in entries["_embedded"]["items"]:
            ename = url2slug(entry["url"])
            eid = entry["id"]
            fname = f"{ename}.epub"
            target = os.path.join(self.tdir, fname)
            if fname in self.existing:
                logging.debug("skipping existing entry %s", entry["id"])
            else:
                with requests.get(
                    f"{keys.wallabag.url}/api/entries/{eid}/export.epub",
                    stream=True,
                    headers=self.auth,
                ) as r:
                    logging.info("saving %s to %s", eid, target)
                    with open(target, "wb") as f:
                        copyfileobj(r.raw, f)
    def run(self):
        tparams = {
            "grant_type": "password",
            "client_id": keys.wallabag.client_id,
            "client_secret": keys.wallabag.client_secret,
            "username": keys.wallabag.username,
            "password": keys.wallabag.password,
        }
        token = requests.post(
            f"{keys.wallabag.url}/oauth/v2/token", data=tparams
        )
        try:
            tdata = token.json()
            if "access_token" not in tdata:
                logging.error(
                    "missing access token from wallabag response"
                )
                return
        except Exception as e:
            logging.error("failed to get token from wallabag: %s", e)
            return
        self.access_token = tdata["access_token"]
        self.auth = {"Authorization": f"Bearer {self.access_token}"}
        r = requests.get(
            f"{keys.wallabag.url}/api/entries", headers=self.auth
        )
        try:
            entries = r.json()
        except Exception as e:
            logging.error(
                "failed to get first page from wallabag: %s", e
            )
            return
        batch = entries["limit"]
        pages = entries["pages"]
        page = entries["page"]
        self.archive_batch(entries)
        while page < pages:
            page = page + 1
            paged = {"perPage": batch, "page": page}
            r = requests.get(
                f"{keys.wallabag.url}/api/entries",
                params=paged,
                headers=self.auth,
            )
            entries = r.json()
            self.archive_batch(entries)
 if __name__ == "__main__":
    wbag = Wallabag()
    wbag.run()
--- a/run.py
+++ b/run.py
@ -5,26 +5,17 @@ import LastFM
 import DeviantArt
 import Flickr
 #import Artstation
 import HackerNews
 from pprint import pprint
 lfm = LastFM.LastFM()
 lfm.run()
 #opml = common.Follows()
 silos = [
-    DeviantArt.DAFavs(),
+#    DeviantArt.DAFavs(),
-    Flickr.FlickrFavs(),
+#    Flickr.FlickrFavs(),
-    Tumblr.TumblrFavs(),
+#    Tumblr.TumblrFavs(),
 #    Artstation.ASFavs(),
 #    LastFM.LastFM(),
    HackerNews.HackerNews()
 ]
 for silo in silos:
    silo.run()
    #silo.sync_with_aperture()
    #opml.update({silo.silo: silo.feeds})
 #opml.sync()
 #opml.export()
 opml = common.Aperture()
 opml.export()
--- a/settings.py
+++ b/settings.py
@ -8,7 +8,7 @@ class nameddict(dict):
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__
-base = os.path.abspath(os.path.expanduser("~/Projects/petermolnar.net"))
+#base = os.path.abspath(os.path.expanduser("~/"))
 opml = nameddict({
    "owner": "Peter Molnar",
@ -18,9 +18,10 @@ opml = nameddict({
 })
 paths = nameddict({
-    "archive": os.path.join(base, "archive"),
+    "archive": os.path.join(os.path.expanduser('~'), "archive"),
-    "content": os.path.join(base, "content"),
+    #"archive": os.path.join(os.path.expanduser('~'), ""),
-    "bookmarks": os.path.join(base, "archive", "bookmarks")
+    #"content": os.path.join(base, "content"),
    #"bookmarks": os.path.join(base, "archive", "bookmarks")
 })
 loglevels = {"critical": 50, "error": 40, "warning": 30, "info": 20, "debug": 10}