From edc40423e866448b3bad09d07ef9b30b0c228472 Mon Sep 17 00:00:00 2001 From: Peter Molnar Date: Thu, 1 Jun 2023 18:06:30 +0100 Subject: [PATCH] small updates & fixes --- DeviantArt.py | 2 +- LastFM.py | 9 ++++++--- common.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- run | 3 +-- run.py | 4 ++-- settings.py | 2 +- 6 files changed, 57 insertions(+), 13 deletions(-) diff --git a/DeviantArt.py b/DeviantArt.py index a70d4ea..b354402 100644 --- a/DeviantArt.py +++ b/DeviantArt.py @@ -133,7 +133,7 @@ class DAFav(common.ImgFav): "deviantart_%s_%s_%s" % ( common.url2slug("%s" % self.deviation.author), - self.id, + self.id.replace("-", "_"), common.url2slug("%s" % self.title), ), ) diff --git a/LastFM.py b/LastFM.py index f8c37a2..2739464 100644 --- a/LastFM.py +++ b/LastFM.py @@ -6,11 +6,13 @@ from operator import attrgetter from collections import namedtuple import requests import arrow +from datetime import datetime import settings import keys from pprint import pprint from math import floor from common import cached_property +import sys Track = namedtuple( "Track", ["timestamp", "artist", "album", "title", "artistid", "albumid", "img"] @@ -43,9 +45,10 @@ class LastFM(object): r = csv.reader(f) for row in r: try: - timestamps.append(arrow.get(row[0]).timestamp) + + timestamps.append(int(datetime.fromisoformat(row[0]).timestamp())) except Exception as e: - logging.error("arrow failed on row %s", row) + logging.error("arrow failed on row %s as: %s", row[0], e) continue return timestamps @@ -64,7 +67,7 @@ class LastFM(object): if ts.timestamp in self.existing: continue entry = Track( - ts.format("YYYY-MM-DDTHH:mm:ssZ"), + ts.format("YYYY-MM-DDTHH:mm:ssZZ"), track.get("artist").get("#text", ""), track.get("album").get("#text", ""), track.get("name", ""), diff --git a/common.py b/common.py index df271cf..6e44dc3 100644 --- a/common.py +++ b/common.py @@ -8,7 +8,6 @@ import subprocess import json from io import BytesIO import lxml.etree as etree -from slugify import slugify import requests from requests.auth import HTTPBasicAuth import arrow @@ -18,6 +17,49 @@ import yaml from pprint import pprint import feedparser +# https://www.peterbe.com/plog/fastest-python-function-to-slugify-a-string +NON_URL_SAFE = [ + '"', + "#", + "$", + "%", + "&", + "+", + ",", + "/", + ":", + ";", + "=", + "?", + "@", + "[", + "]", + "^", + "`", + "{", + "|", + "}", + "~", + "'", + ".", + "\\", +] +# TRANSLATE_TABLE = {ord(char): "" for char in NON_URL_SAFE} +RE_NON_URL_SAFE = re.compile( + r"[{}]".format("".join(re.escape(x) for x in NON_URL_SAFE)) +) +RE_REMOVESCHEME = re.compile(r"^https?://(?:www)?") + + +def slugify(text): + text = RE_REMOVESCHEME.sub("", text).strip() + text = RE_NON_URL_SAFE.sub("", text).strip() + text = text.lower() + text = "_".join(re.split(r"\s+", text)) + return text + + + TMPFEXT = ".xyz" MDFEXT = ".md" @@ -42,9 +84,9 @@ def utfyamldump(data): def url2slug(url): return slugify( - re.sub(r"^https?://(?:www)?", "", url), - only_ascii=True, - lower=True, + re.sub(r"^https?://(?:www)?", "", url) + #only_ascii=True, + #lower=True, )[:200] diff --git a/run b/run index 4fe17bb..0412441 100755 --- a/run +++ b/run @@ -1,6 +1,5 @@ #!/usr/bin/env bash -set -euo pipefail -IFS=$'\n\t' +source .venv/bin/activate python3 run.py diff --git a/run.py b/run.py index 010a0cb..8d7368f 100644 --- a/run.py +++ b/run.py @@ -9,11 +9,11 @@ import HackerNews from pprint import pprint silos = [ - DeviantArt.DAFavs(), Flickr.FlickrFavs(), Tumblr.TumblrFavs(), + DeviantArt.DAFavs(), # Artstation.ASFavs(), - LastFM.LastFM(), +# LastFM.LastFM(), HackerNews.HackerNews() ] diff --git a/settings.py b/settings.py index 4ad4408..1d44ceb 100644 --- a/settings.py +++ b/settings.py @@ -9,7 +9,7 @@ class nameddict(dict): __delattr__ = dict.__delitem__ paths = nameddict({ - "archive": os.path.join(os.path.expanduser('~'), "archive"), + "archive": os.path.join(os.path.expanduser('~'), "archiv"), }) loglevels = {"critical": 50, "error": 40, "warning": 30, "info": 20, "debug": 10}