pre-cleanup commit

This commit is contained in:
Peter Molnar 2020-09-07 08:35:30 +01:00
parent d5665d15e7
commit fc54524162
4 changed files with 157 additions and 19 deletions

40
HackerNews.py Normal file
View file

@ -0,0 +1,40 @@
import os
import glob
import logging
import json
import requests
from common import cached_property
import settings
import keys
class HackerNews(object):
url = "https://hacker-news.firebaseio.com/v0/"
@property
def tdir(self):
return os.path.join(settings.paths.get("archive"), "hn")
@cached_property
def existing(self):
return [os.path.basename(fpath).replace(".json", "") for fpath in glob.glob(os.path.join(self.tdir, "*.json"))]
def run(self):
user = keys.hackernews.get("username")
content = requests.get(f"{self.url}/user/{user}.json")
data = content.json()
if "submitted" not in data:
return
for entry in data["submitted"]:
if entry in self.existing:
logging.debug("skipping HackerNews entry %s", entry)
continue
entry_data = requests.get(f"{self.url}/item/{entry}.json")
target = os.path.join(self.tdir, f"{entry}.json")
with open(target, "wt") as f:
logging.info("saving HackerNews entry %s", entry)
f.write(json.dumps(entry_data.json(), indent=4, ensure_ascii=False))
if __name__ == "__main__":
hn = HackerNews()
hn.run()

106
Wallabag.py Normal file
View file

@ -0,0 +1,106 @@
import os
import glob
import json
import re
import logging
import requests
import settings
import keys
from shutil import copyfileobj
from common import cached_property
from common import url2slug
from pprint import pprint
RE_FNAME = re.compile(r"(?P<id>[0-9]+)_(?P<slug>.*).epub")
class Wallabag(object):
def __init__(self):
self.access_token = ""
self.auth = {}
@property
def tdir(self):
return settings.paths.bookmarks
@cached_property
def existing(self):
return [
os.path.basename(fpath)
for fpath in glob.glob(os.path.join(self.tdir, "*"))
]
def archive_batch(self, entries):
for entry in entries["_embedded"]["items"]:
ename = url2slug(entry["url"])
eid = entry["id"]
fname = f"{ename}.epub"
target = os.path.join(self.tdir, fname)
if fname in self.existing:
logging.debug("skipping existing entry %s", entry["id"])
else:
with requests.get(
f"{keys.wallabag.url}/api/entries/{eid}/export.epub",
stream=True,
headers=self.auth,
) as r:
logging.info("saving %s to %s", eid, target)
with open(target, "wb") as f:
copyfileobj(r.raw, f)
def run(self):
tparams = {
"grant_type": "password",
"client_id": keys.wallabag.client_id,
"client_secret": keys.wallabag.client_secret,
"username": keys.wallabag.username,
"password": keys.wallabag.password,
}
token = requests.post(
f"{keys.wallabag.url}/oauth/v2/token", data=tparams
)
try:
tdata = token.json()
if "access_token" not in tdata:
logging.error(
"missing access token from wallabag response"
)
return
except Exception as e:
logging.error("failed to get token from wallabag: %s", e)
return
self.access_token = tdata["access_token"]
self.auth = {"Authorization": f"Bearer {self.access_token}"}
r = requests.get(
f"{keys.wallabag.url}/api/entries", headers=self.auth
)
try:
entries = r.json()
except Exception as e:
logging.error(
"failed to get first page from wallabag: %s", e
)
return
batch = entries["limit"]
pages = entries["pages"]
page = entries["page"]
self.archive_batch(entries)
while page < pages:
page = page + 1
paged = {"perPage": batch, "page": page}
r = requests.get(
f"{keys.wallabag.url}/api/entries",
params=paged,
headers=self.auth,
)
entries = r.json()
self.archive_batch(entries)
if __name__ == "__main__":
wbag = Wallabag()
wbag.run()

21
run.py
View file

@ -5,26 +5,17 @@ import LastFM
import DeviantArt
import Flickr
#import Artstation
import HackerNews
from pprint import pprint
lfm = LastFM.LastFM()
lfm.run()
#opml = common.Follows()
silos = [
DeviantArt.DAFavs(),
Flickr.FlickrFavs(),
Tumblr.TumblrFavs(),
# DeviantArt.DAFavs(),
# Flickr.FlickrFavs(),
# Tumblr.TumblrFavs(),
# Artstation.ASFavs(),
# LastFM.LastFM(),
HackerNews.HackerNews()
]
for silo in silos:
silo.run()
#silo.sync_with_aperture()
#opml.update({silo.silo: silo.feeds})
#opml.sync()
#opml.export()
opml = common.Aperture()
opml.export()

View file

@ -8,7 +8,7 @@ class nameddict(dict):
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
base = os.path.abspath(os.path.expanduser("~/Projects/petermolnar.net"))
#base = os.path.abspath(os.path.expanduser("~/"))
opml = nameddict({
"owner": "Peter Molnar",
@ -18,9 +18,10 @@ opml = nameddict({
})
paths = nameddict({
"archive": os.path.join(base, "archive"),
"content": os.path.join(base, "content"),
"bookmarks": os.path.join(base, "archive", "bookmarks")
"archive": os.path.join(os.path.expanduser('~'), "archive"),
#"archive": os.path.join(os.path.expanduser('~'), ""),
#"content": os.path.join(base, "content"),
#"bookmarks": os.path.join(base, "archive", "bookmarks")
})
loglevels = {"critical": 50, "error": 40, "warning": 30, "info": 20, "debug": 10}