silo.pasta/Wallabag.py
2020-09-07 08:35:30 +01:00

106 lines
3 KiB
Python

import os
import glob
import json
import re
import logging
import requests
import settings
import keys
from shutil import copyfileobj
from common import cached_property
from common import url2slug
from pprint import pprint
RE_FNAME = re.compile(r"(?P<id>[0-9]+)_(?P<slug>.*).epub")
class Wallabag(object):
def __init__(self):
self.access_token = ""
self.auth = {}
@property
def tdir(self):
return settings.paths.bookmarks
@cached_property
def existing(self):
return [
os.path.basename(fpath)
for fpath in glob.glob(os.path.join(self.tdir, "*"))
]
def archive_batch(self, entries):
for entry in entries["_embedded"]["items"]:
ename = url2slug(entry["url"])
eid = entry["id"]
fname = f"{ename}.epub"
target = os.path.join(self.tdir, fname)
if fname in self.existing:
logging.debug("skipping existing entry %s", entry["id"])
else:
with requests.get(
f"{keys.wallabag.url}/api/entries/{eid}/export.epub",
stream=True,
headers=self.auth,
) as r:
logging.info("saving %s to %s", eid, target)
with open(target, "wb") as f:
copyfileobj(r.raw, f)
def run(self):
tparams = {
"grant_type": "password",
"client_id": keys.wallabag.client_id,
"client_secret": keys.wallabag.client_secret,
"username": keys.wallabag.username,
"password": keys.wallabag.password,
}
token = requests.post(
f"{keys.wallabag.url}/oauth/v2/token", data=tparams
)
try:
tdata = token.json()
if "access_token" not in tdata:
logging.error(
"missing access token from wallabag response"
)
return
except Exception as e:
logging.error("failed to get token from wallabag: %s", e)
return
self.access_token = tdata["access_token"]
self.auth = {"Authorization": f"Bearer {self.access_token}"}
r = requests.get(
f"{keys.wallabag.url}/api/entries", headers=self.auth
)
try:
entries = r.json()
except Exception as e:
logging.error(
"failed to get first page from wallabag: %s", e
)
return
batch = entries["limit"]
pages = entries["pages"]
page = entries["page"]
self.archive_batch(entries)
while page < pages:
page = page + 1
paged = {"perPage": batch, "page": page}
r = requests.get(
f"{keys.wallabag.url}/api/entries",
params=paged,
headers=self.auth,
)
entries = r.json()
self.archive_batch(entries)
if __name__ == "__main__":
wbag = Wallabag()
wbag.run()