all repos — nasg @ c5ec1e95fa68d318d344cc54c08f39a3126253dc

save to archive.org if memento lookup fails
Peter Molnar hello@petermolnar.eu
Wed, 28 Aug 2019 08:52:11 +0100
commit

c5ec1e95fa68d318d344cc54c08f39a3126253dc

parent

18e67c3733de93169f8ecd02f14c0cc26b26d86d

1 files changed, 11 insertions(+), 1 deletions(-)

jump to
M wayback.pywayback.py

@@ -39,6 +39,15 @@ self.category = category

self.epoch = int(arrow.utcnow().timestamp) self.oldest = "" + def save_to_archiveorg(self): + urls = [ + f"{settings.site.url}/{self.path}/", + f"{settings.site.url}/{self.path}/index.html" + ] + for url in urls: + logger.info("saving %s to archive.org ", url) + r = requests.get(f"https://web.archive.org/save/{url}") + def possible_urls(self): q = {} q[f"http://{settings.site.name}/{self.path}/"] = True

@@ -47,7 +56,7 @@

domains = settings.formerdomains + [settings.site.name] for domain in domains: q[f"http://{domain}/{self.path}/"] = True - categories = [self.category] + categories = [] if self.category in settings.formercategories: categories = categories + settings.formercategories[self.category] for category in categories:

@@ -107,6 +116,7 @@ self.oldest = maybe.url

sleep(.500) if not len(self.oldest): logger.error("no memento found for %s", self.path) + self.save_to_archiveorg() else: logger.info( "\t\toldest found memento for %s: %s :: %s",