pre-cleanup commit
This commit is contained in:
parent
d5665d15e7
commit
fc54524162
4 changed files with 157 additions and 19 deletions
40
HackerNews.py
Normal file
40
HackerNews.py
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
from common import cached_property
|
||||||
|
import settings
|
||||||
|
import keys
|
||||||
|
|
||||||
|
class HackerNews(object):
|
||||||
|
url = "https://hacker-news.firebaseio.com/v0/"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def tdir(self):
|
||||||
|
return os.path.join(settings.paths.get("archive"), "hn")
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def existing(self):
|
||||||
|
return [os.path.basename(fpath).replace(".json", "") for fpath in glob.glob(os.path.join(self.tdir, "*.json"))]
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
user = keys.hackernews.get("username")
|
||||||
|
content = requests.get(f"{self.url}/user/{user}.json")
|
||||||
|
data = content.json()
|
||||||
|
if "submitted" not in data:
|
||||||
|
return
|
||||||
|
for entry in data["submitted"]:
|
||||||
|
if entry in self.existing:
|
||||||
|
logging.debug("skipping HackerNews entry %s", entry)
|
||||||
|
continue
|
||||||
|
entry_data = requests.get(f"{self.url}/item/{entry}.json")
|
||||||
|
target = os.path.join(self.tdir, f"{entry}.json")
|
||||||
|
with open(target, "wt") as f:
|
||||||
|
logging.info("saving HackerNews entry %s", entry)
|
||||||
|
f.write(json.dumps(entry_data.json(), indent=4, ensure_ascii=False))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
hn = HackerNews()
|
||||||
|
hn.run()
|
106
Wallabag.py
Normal file
106
Wallabag.py
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
import requests
|
||||||
|
import settings
|
||||||
|
import keys
|
||||||
|
from shutil import copyfileobj
|
||||||
|
from common import cached_property
|
||||||
|
from common import url2slug
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
RE_FNAME = re.compile(r"(?P<id>[0-9]+)_(?P<slug>.*).epub")
|
||||||
|
|
||||||
|
|
||||||
|
class Wallabag(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.access_token = ""
|
||||||
|
self.auth = {}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def tdir(self):
|
||||||
|
return settings.paths.bookmarks
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def existing(self):
|
||||||
|
return [
|
||||||
|
os.path.basename(fpath)
|
||||||
|
for fpath in glob.glob(os.path.join(self.tdir, "*"))
|
||||||
|
]
|
||||||
|
|
||||||
|
def archive_batch(self, entries):
|
||||||
|
for entry in entries["_embedded"]["items"]:
|
||||||
|
ename = url2slug(entry["url"])
|
||||||
|
eid = entry["id"]
|
||||||
|
fname = f"{ename}.epub"
|
||||||
|
target = os.path.join(self.tdir, fname)
|
||||||
|
|
||||||
|
if fname in self.existing:
|
||||||
|
logging.debug("skipping existing entry %s", entry["id"])
|
||||||
|
else:
|
||||||
|
with requests.get(
|
||||||
|
f"{keys.wallabag.url}/api/entries/{eid}/export.epub",
|
||||||
|
stream=True,
|
||||||
|
headers=self.auth,
|
||||||
|
) as r:
|
||||||
|
logging.info("saving %s to %s", eid, target)
|
||||||
|
with open(target, "wb") as f:
|
||||||
|
copyfileobj(r.raw, f)
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
tparams = {
|
||||||
|
"grant_type": "password",
|
||||||
|
"client_id": keys.wallabag.client_id,
|
||||||
|
"client_secret": keys.wallabag.client_secret,
|
||||||
|
"username": keys.wallabag.username,
|
||||||
|
"password": keys.wallabag.password,
|
||||||
|
}
|
||||||
|
token = requests.post(
|
||||||
|
f"{keys.wallabag.url}/oauth/v2/token", data=tparams
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
tdata = token.json()
|
||||||
|
if "access_token" not in tdata:
|
||||||
|
logging.error(
|
||||||
|
"missing access token from wallabag response"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
logging.error("failed to get token from wallabag: %s", e)
|
||||||
|
return
|
||||||
|
|
||||||
|
self.access_token = tdata["access_token"]
|
||||||
|
self.auth = {"Authorization": f"Bearer {self.access_token}"}
|
||||||
|
|
||||||
|
r = requests.get(
|
||||||
|
f"{keys.wallabag.url}/api/entries", headers=self.auth
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
entries = r.json()
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(
|
||||||
|
"failed to get first page from wallabag: %s", e
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
batch = entries["limit"]
|
||||||
|
pages = entries["pages"]
|
||||||
|
page = entries["page"]
|
||||||
|
self.archive_batch(entries)
|
||||||
|
while page < pages:
|
||||||
|
page = page + 1
|
||||||
|
paged = {"perPage": batch, "page": page}
|
||||||
|
r = requests.get(
|
||||||
|
f"{keys.wallabag.url}/api/entries",
|
||||||
|
params=paged,
|
||||||
|
headers=self.auth,
|
||||||
|
)
|
||||||
|
entries = r.json()
|
||||||
|
self.archive_batch(entries)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
wbag = Wallabag()
|
||||||
|
wbag.run()
|
21
run.py
21
run.py
|
@ -5,26 +5,17 @@ import LastFM
|
||||||
import DeviantArt
|
import DeviantArt
|
||||||
import Flickr
|
import Flickr
|
||||||
#import Artstation
|
#import Artstation
|
||||||
|
import HackerNews
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|
||||||
lfm = LastFM.LastFM()
|
|
||||||
lfm.run()
|
|
||||||
|
|
||||||
#opml = common.Follows()
|
|
||||||
|
|
||||||
silos = [
|
silos = [
|
||||||
DeviantArt.DAFavs(),
|
# DeviantArt.DAFavs(),
|
||||||
Flickr.FlickrFavs(),
|
# Flickr.FlickrFavs(),
|
||||||
Tumblr.TumblrFavs(),
|
# Tumblr.TumblrFavs(),
|
||||||
# Artstation.ASFavs(),
|
# Artstation.ASFavs(),
|
||||||
|
# LastFM.LastFM(),
|
||||||
|
HackerNews.HackerNews()
|
||||||
]
|
]
|
||||||
|
|
||||||
for silo in silos:
|
for silo in silos:
|
||||||
silo.run()
|
silo.run()
|
||||||
#silo.sync_with_aperture()
|
|
||||||
#opml.update({silo.silo: silo.feeds})
|
|
||||||
|
|
||||||
#opml.sync()
|
|
||||||
#opml.export()
|
|
||||||
opml = common.Aperture()
|
|
||||||
opml.export()
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ class nameddict(dict):
|
||||||
__setattr__ = dict.__setitem__
|
__setattr__ = dict.__setitem__
|
||||||
__delattr__ = dict.__delitem__
|
__delattr__ = dict.__delitem__
|
||||||
|
|
||||||
base = os.path.abspath(os.path.expanduser("~/Projects/petermolnar.net"))
|
#base = os.path.abspath(os.path.expanduser("~/"))
|
||||||
|
|
||||||
opml = nameddict({
|
opml = nameddict({
|
||||||
"owner": "Peter Molnar",
|
"owner": "Peter Molnar",
|
||||||
|
@ -18,9 +18,10 @@ opml = nameddict({
|
||||||
})
|
})
|
||||||
|
|
||||||
paths = nameddict({
|
paths = nameddict({
|
||||||
"archive": os.path.join(base, "archive"),
|
"archive": os.path.join(os.path.expanduser('~'), "archive"),
|
||||||
"content": os.path.join(base, "content"),
|
#"archive": os.path.join(os.path.expanduser('~'), ""),
|
||||||
"bookmarks": os.path.join(base, "archive", "bookmarks")
|
#"content": os.path.join(base, "content"),
|
||||||
|
#"bookmarks": os.path.join(base, "archive", "bookmarks")
|
||||||
})
|
})
|
||||||
|
|
||||||
loglevels = {"critical": 50, "error": 40, "warning": 30, "info": 20, "debug": 10}
|
loglevels = {"critical": 50, "error": 40, "warning": 30, "info": 20, "debug": 10}
|
||||||
|
|
Loading…
Reference in a new issue