106 lines
3 KiB
Python
106 lines
3 KiB
Python
import os
|
|
import glob
|
|
import json
|
|
import re
|
|
import logging
|
|
import requests
|
|
import settings
|
|
import keys
|
|
from shutil import copyfileobj
|
|
from common import cached_property
|
|
from common import url2slug
|
|
from pprint import pprint
|
|
|
|
RE_FNAME = re.compile(r"(?P<id>[0-9]+)_(?P<slug>.*).epub")
|
|
|
|
|
|
class Wallabag(object):
|
|
def __init__(self):
|
|
self.access_token = ""
|
|
self.auth = {}
|
|
|
|
@property
|
|
def tdir(self):
|
|
return settings.paths.bookmarks
|
|
|
|
@cached_property
|
|
def existing(self):
|
|
return [
|
|
os.path.basename(fpath)
|
|
for fpath in glob.glob(os.path.join(self.tdir, "*"))
|
|
]
|
|
|
|
def archive_batch(self, entries):
|
|
for entry in entries["_embedded"]["items"]:
|
|
ename = url2slug(entry["url"])
|
|
eid = entry["id"]
|
|
fname = f"{ename}.epub"
|
|
target = os.path.join(self.tdir, fname)
|
|
|
|
if fname in self.existing:
|
|
logging.debug("skipping existing entry %s", entry["id"])
|
|
else:
|
|
with requests.get(
|
|
f"{keys.wallabag.url}/api/entries/{eid}/export.epub",
|
|
stream=True,
|
|
headers=self.auth,
|
|
) as r:
|
|
logging.info("saving %s to %s", eid, target)
|
|
with open(target, "wb") as f:
|
|
copyfileobj(r.raw, f)
|
|
|
|
def run(self):
|
|
tparams = {
|
|
"grant_type": "password",
|
|
"client_id": keys.wallabag.client_id,
|
|
"client_secret": keys.wallabag.client_secret,
|
|
"username": keys.wallabag.username,
|
|
"password": keys.wallabag.password,
|
|
}
|
|
token = requests.post(
|
|
f"{keys.wallabag.url}/oauth/v2/token", data=tparams
|
|
)
|
|
try:
|
|
tdata = token.json()
|
|
if "access_token" not in tdata:
|
|
logging.error(
|
|
"missing access token from wallabag response"
|
|
)
|
|
return
|
|
except Exception as e:
|
|
logging.error("failed to get token from wallabag: %s", e)
|
|
return
|
|
|
|
self.access_token = tdata["access_token"]
|
|
self.auth = {"Authorization": f"Bearer {self.access_token}"}
|
|
|
|
r = requests.get(
|
|
f"{keys.wallabag.url}/api/entries", headers=self.auth
|
|
)
|
|
try:
|
|
entries = r.json()
|
|
except Exception as e:
|
|
logging.error(
|
|
"failed to get first page from wallabag: %s", e
|
|
)
|
|
return
|
|
|
|
batch = entries["limit"]
|
|
pages = entries["pages"]
|
|
page = entries["page"]
|
|
self.archive_batch(entries)
|
|
while page < pages:
|
|
page = page + 1
|
|
paged = {"perPage": batch, "page": page}
|
|
r = requests.get(
|
|
f"{keys.wallabag.url}/api/entries",
|
|
params=paged,
|
|
headers=self.auth,
|
|
)
|
|
entries = r.json()
|
|
self.archive_batch(entries)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
wbag = Wallabag()
|
|
wbag.run()
|