From fc54524162628a8545dcac1db7f5cb6f41d1690a Mon Sep 17 00:00:00 2001
From: Peter Molnar <hello@petermolnar.eu>
Date: Mon, 7 Sep 2020 08:35:30 +0100
Subject: [PATCH] pre-cleanup commit

---
 HackerNews.py |  40 +++++++++++++++++++
 Wallabag.py   | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++
 run.py        |  21 +++-------
 settings.py   |   9 +++--
 4 files changed, 157 insertions(+), 19 deletions(-)
 create mode 100644 HackerNews.py
 create mode 100644 Wallabag.py
diff --git a/HackerNews.py b/HackerNews.py
new file mode 100644
index 0000000..0916335
--- /dev/null
+++ b/HackerNews.py
@@ -0,0 +1,40 @@
+import os
+import glob
+import logging
+import json
+import requests
+from common import cached_property
+import settings
+import keys
+
+class HackerNews(object):
+    url = "https://hacker-news.firebaseio.com/v0/"
+
+    @property
+    def tdir(self):
+        return os.path.join(settings.paths.get("archive"), "hn")
+
+    @cached_property
+    def existing(self):
+        return [os.path.basename(fpath).replace(".json", "") for fpath in glob.glob(os.path.join(self.tdir, "*.json"))]
+
+    def run(self):
+        user = keys.hackernews.get("username")
+        content = requests.get(f"{self.url}/user/{user}.json")
+        data = content.json()
+        if "submitted" not in data:
+            return
+        for entry in data["submitted"]:
+            if entry in self.existing:
+                logging.debug("skipping HackerNews entry %s", entry)
+                continue
+            entry_data = requests.get(f"{self.url}/item/{entry}.json")
+            target = os.path.join(self.tdir, f"{entry}.json")
+            with open(target, "wt") as f:
+                logging.info("saving HackerNews entry %s", entry)
+                f.write(json.dumps(entry_data.json(), indent=4, ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    hn = HackerNews()
+    hn.run()
diff --git a/Wallabag.py b/Wallabag.py
new file mode 100644
index 0000000..ba711ad
--- /dev/null
+++ b/Wallabag.py
@@ -0,0 +1,106 @@
+import os
+import glob
+import json
+import re
+import logging
+import requests
+import settings
+import keys
+from shutil import copyfileobj
+from common import cached_property
+from common import url2slug
+from pprint import pprint
+
+RE_FNAME = re.compile(r"(?P<id>[0-9]+)_(?P<slug>.*).epub")
+
+
+class Wallabag(object):
+    def __init__(self):
+        self.access_token = ""
+        self.auth = {}
+
+    @property
+    def tdir(self):
+        return settings.paths.bookmarks
+
+    @cached_property
+    def existing(self):
+        return [
+            os.path.basename(fpath)
+            for fpath in glob.glob(os.path.join(self.tdir, "*"))
+        ]
+
+    def archive_batch(self, entries):
+        for entry in entries["_embedded"]["items"]:
+            ename = url2slug(entry["url"])
+            eid = entry["id"]
+            fname = f"{ename}.epub"
+            target = os.path.join(self.tdir, fname)
+
+            if fname in self.existing:
+                logging.debug("skipping existing entry %s", entry["id"])
+            else:
+                with requests.get(
+                    f"{keys.wallabag.url}/api/entries/{eid}/export.epub",
+                    stream=True,
+                    headers=self.auth,
+                ) as r:
+                    logging.info("saving %s to %s", eid, target)
+                    with open(target, "wb") as f:
+                        copyfileobj(r.raw, f)
+
+    def run(self):
+        tparams = {
+            "grant_type": "password",
+            "client_id": keys.wallabag.client_id,
+            "client_secret": keys.wallabag.client_secret,
+            "username": keys.wallabag.username,
+            "password": keys.wallabag.password,
+        }
+        token = requests.post(
+            f"{keys.wallabag.url}/oauth/v2/token", data=tparams
+        )
+        try:
+            tdata = token.json()
+            if "access_token" not in tdata:
+                logging.error(
+                    "missing access token from wallabag response"
+                )
+                return
+        except Exception as e:
+            logging.error("failed to get token from wallabag: %s", e)
+            return
+
+        self.access_token = tdata["access_token"]
+        self.auth = {"Authorization": f"Bearer {self.access_token}"}
+
+        r = requests.get(
+            f"{keys.wallabag.url}/api/entries", headers=self.auth
+        )
+        try:
+            entries = r.json()
+        except Exception as e:
+            logging.error(
+                "failed to get first page from wallabag: %s", e
+            )
+            return
+
+        batch = entries["limit"]
+        pages = entries["pages"]
+        page = entries["page"]
+        self.archive_batch(entries)
+        while page < pages:
+            page = page + 1
+            paged = {"perPage": batch, "page": page}
+            r = requests.get(
+                f"{keys.wallabag.url}/api/entries",
+                params=paged,
+                headers=self.auth,
+            )
+            entries = r.json()
+            self.archive_batch(entries)
+
+
+if __name__ == "__main__":
+    wbag = Wallabag()
+    wbag.run()
diff --git a/run.py b/run.py
index 9f0ce14..2f40e28 100644
--- a/run.py
+++ b/run.py
@@ -5,26 +5,17 @@ import LastFM
 import DeviantArt
 import Flickr
 #import Artstation
+import HackerNews
 from pprint import pprint
 
-lfm = LastFM.LastFM()
-lfm.run()
-
-#opml = common.Follows()
-
 silos = [
-    DeviantArt.DAFavs(),
-    Flickr.FlickrFavs(),
-    Tumblr.TumblrFavs(),
+#    DeviantArt.DAFavs(),
+#    Flickr.FlickrFavs(),
+#    Tumblr.TumblrFavs(),
 #    Artstation.ASFavs(),
+#    LastFM.LastFM(),
+    HackerNews.HackerNews()
 ]
 
 for silo in silos:
     silo.run()
-    #silo.sync_with_aperture()
-    #opml.update({silo.silo: silo.feeds})
-
-#opml.sync()
-#opml.export()
-opml = common.Aperture()
-opml.export()
diff --git a/settings.py b/settings.py
index 957b717..bb60d17 100644
--- a/settings.py
+++ b/settings.py
@@ -8,7 +8,7 @@ class nameddict(dict):
     __setattr__ = dict.__setitem__
     __delattr__ = dict.__delitem__
 
-base = os.path.abspath(os.path.expanduser("~/Projects/petermolnar.net"))
+#base = os.path.abspath(os.path.expanduser("~/"))
 
 opml = nameddict({
     "owner": "Peter Molnar",
@@ -18,9 +18,10 @@ opml = nameddict({
 })
 
 paths = nameddict({
-    "archive": os.path.join(base, "archive"),
-    "content": os.path.join(base, "content"),
-    "bookmarks": os.path.join(base, "archive", "bookmarks")
+    "archive": os.path.join(os.path.expanduser('~'), "archive"),
+    #"archive": os.path.join(os.path.expanduser('~'), ""),
+    #"content": os.path.join(base, "content"),
+    #"bookmarks": os.path.join(base, "archive", "bookmarks")
 })
 
 loglevels = {"critical": 50, "error": 40, "warning": 30, "info": 20, "debug": 10}