pre-cleanup commit
This commit is contained in:
parent
d5665d15e7
commit
fc54524162
4 changed files with 157 additions and 19 deletions
40
HackerNews.py
Normal file
40
HackerNews.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
import os
|
||||
import glob
|
||||
import logging
|
||||
import json
|
||||
import requests
|
||||
from common import cached_property
|
||||
import settings
|
||||
import keys
|
||||
|
||||
class HackerNews(object):
|
||||
url = "https://hacker-news.firebaseio.com/v0/"
|
||||
|
||||
@property
|
||||
def tdir(self):
|
||||
return os.path.join(settings.paths.get("archive"), "hn")
|
||||
|
||||
@cached_property
|
||||
def existing(self):
|
||||
return [os.path.basename(fpath).replace(".json", "") for fpath in glob.glob(os.path.join(self.tdir, "*.json"))]
|
||||
|
||||
def run(self):
|
||||
user = keys.hackernews.get("username")
|
||||
content = requests.get(f"{self.url}/user/{user}.json")
|
||||
data = content.json()
|
||||
if "submitted" not in data:
|
||||
return
|
||||
for entry in data["submitted"]:
|
||||
if entry in self.existing:
|
||||
logging.debug("skipping HackerNews entry %s", entry)
|
||||
continue
|
||||
entry_data = requests.get(f"{self.url}/item/{entry}.json")
|
||||
target = os.path.join(self.tdir, f"{entry}.json")
|
||||
with open(target, "wt") as f:
|
||||
logging.info("saving HackerNews entry %s", entry)
|
||||
f.write(json.dumps(entry_data.json(), indent=4, ensure_ascii=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
hn = HackerNews()
|
||||
hn.run()
|
106
Wallabag.py
Normal file
106
Wallabag.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
import os
|
||||
import glob
|
||||
import json
|
||||
import re
|
||||
import logging
|
||||
import requests
|
||||
import settings
|
||||
import keys
|
||||
from shutil import copyfileobj
|
||||
from common import cached_property
|
||||
from common import url2slug
|
||||
from pprint import pprint
|
||||
|
||||
RE_FNAME = re.compile(r"(?P<id>[0-9]+)_(?P<slug>.*).epub")
|
||||
|
||||
|
||||
class Wallabag(object):
|
||||
def __init__(self):
|
||||
self.access_token = ""
|
||||
self.auth = {}
|
||||
|
||||
@property
|
||||
def tdir(self):
|
||||
return settings.paths.bookmarks
|
||||
|
||||
@cached_property
|
||||
def existing(self):
|
||||
return [
|
||||
os.path.basename(fpath)
|
||||
for fpath in glob.glob(os.path.join(self.tdir, "*"))
|
||||
]
|
||||
|
||||
def archive_batch(self, entries):
|
||||
for entry in entries["_embedded"]["items"]:
|
||||
ename = url2slug(entry["url"])
|
||||
eid = entry["id"]
|
||||
fname = f"{ename}.epub"
|
||||
target = os.path.join(self.tdir, fname)
|
||||
|
||||
if fname in self.existing:
|
||||
logging.debug("skipping existing entry %s", entry["id"])
|
||||
else:
|
||||
with requests.get(
|
||||
f"{keys.wallabag.url}/api/entries/{eid}/export.epub",
|
||||
stream=True,
|
||||
headers=self.auth,
|
||||
) as r:
|
||||
logging.info("saving %s to %s", eid, target)
|
||||
with open(target, "wb") as f:
|
||||
copyfileobj(r.raw, f)
|
||||
|
||||
def run(self):
|
||||
tparams = {
|
||||
"grant_type": "password",
|
||||
"client_id": keys.wallabag.client_id,
|
||||
"client_secret": keys.wallabag.client_secret,
|
||||
"username": keys.wallabag.username,
|
||||
"password": keys.wallabag.password,
|
||||
}
|
||||
token = requests.post(
|
||||
f"{keys.wallabag.url}/oauth/v2/token", data=tparams
|
||||
)
|
||||
try:
|
||||
tdata = token.json()
|
||||
if "access_token" not in tdata:
|
||||
logging.error(
|
||||
"missing access token from wallabag response"
|
||||
)
|
||||
return
|
||||
except Exception as e:
|
||||
logging.error("failed to get token from wallabag: %s", e)
|
||||
return
|
||||
|
||||
self.access_token = tdata["access_token"]
|
||||
self.auth = {"Authorization": f"Bearer {self.access_token}"}
|
||||
|
||||
r = requests.get(
|
||||
f"{keys.wallabag.url}/api/entries", headers=self.auth
|
||||
)
|
||||
try:
|
||||
entries = r.json()
|
||||
except Exception as e:
|
||||
logging.error(
|
||||
"failed to get first page from wallabag: %s", e
|
||||
)
|
||||
return
|
||||
|
||||
batch = entries["limit"]
|
||||
pages = entries["pages"]
|
||||
page = entries["page"]
|
||||
self.archive_batch(entries)
|
||||
while page < pages:
|
||||
page = page + 1
|
||||
paged = {"perPage": batch, "page": page}
|
||||
r = requests.get(
|
||||
f"{keys.wallabag.url}/api/entries",
|
||||
params=paged,
|
||||
headers=self.auth,
|
||||
)
|
||||
entries = r.json()
|
||||
self.archive_batch(entries)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
wbag = Wallabag()
|
||||
wbag.run()
|
21
run.py
21
run.py
|
@ -5,26 +5,17 @@ import LastFM
|
|||
import DeviantArt
|
||||
import Flickr
|
||||
#import Artstation
|
||||
import HackerNews
|
||||
from pprint import pprint
|
||||
|
||||
lfm = LastFM.LastFM()
|
||||
lfm.run()
|
||||
|
||||
#opml = common.Follows()
|
||||
|
||||
silos = [
|
||||
DeviantArt.DAFavs(),
|
||||
Flickr.FlickrFavs(),
|
||||
Tumblr.TumblrFavs(),
|
||||
# DeviantArt.DAFavs(),
|
||||
# Flickr.FlickrFavs(),
|
||||
# Tumblr.TumblrFavs(),
|
||||
# Artstation.ASFavs(),
|
||||
# LastFM.LastFM(),
|
||||
HackerNews.HackerNews()
|
||||
]
|
||||
|
||||
for silo in silos:
|
||||
silo.run()
|
||||
#silo.sync_with_aperture()
|
||||
#opml.update({silo.silo: silo.feeds})
|
||||
|
||||
#opml.sync()
|
||||
#opml.export()
|
||||
opml = common.Aperture()
|
||||
opml.export()
|
||||
|
|
|
@ -8,7 +8,7 @@ class nameddict(dict):
|
|||
__setattr__ = dict.__setitem__
|
||||
__delattr__ = dict.__delitem__
|
||||
|
||||
base = os.path.abspath(os.path.expanduser("~/Projects/petermolnar.net"))
|
||||
#base = os.path.abspath(os.path.expanduser("~/"))
|
||||
|
||||
opml = nameddict({
|
||||
"owner": "Peter Molnar",
|
||||
|
@ -18,9 +18,10 @@ opml = nameddict({
|
|||
})
|
||||
|
||||
paths = nameddict({
|
||||
"archive": os.path.join(base, "archive"),
|
||||
"content": os.path.join(base, "content"),
|
||||
"bookmarks": os.path.join(base, "archive", "bookmarks")
|
||||
"archive": os.path.join(os.path.expanduser('~'), "archive"),
|
||||
#"archive": os.path.join(os.path.expanduser('~'), ""),
|
||||
#"content": os.path.join(base, "content"),
|
||||
#"bookmarks": os.path.join(base, "archive", "bookmarks")
|
||||
})
|
||||
|
||||
loglevels = {"critical": 50, "error": 40, "warning": 30, "info": 20, "debug": 10}
|
||||
|
|
Loading…
Reference in a new issue