small updates & fixes

2023-06-01 18:06:30 +01:00 · 2023-06-01 18:06:30 +01:00 · edc40423e8
commit edc40423e8
parent 7cd5ba9364
6 changed files with 57 additions and 13 deletions
--- a/DeviantArt.py
+++ b/DeviantArt.py
@ -133,7 +133,7 @@ class DAFav(common.ImgFav):
            "deviantart_%s_%s_%s"
            % (
                common.url2slug("%s" % self.deviation.author),
-                self.id,
+                self.id.replace("-", "_"),
                common.url2slug("%s" % self.title),
            ),
        )
--- a/LastFM.py
+++ b/LastFM.py
@ -6,11 +6,13 @@ from operator import attrgetter
 from collections import namedtuple
 import requests
 import arrow
+from datetime import datetime
 import settings
 import keys
 from pprint import pprint
 from math import floor
 from common import cached_property
+import sys

 Track = namedtuple(
    "Track", ["timestamp", "artist", "album", "title", "artistid", "albumid", "img"]
@ -43,9 +45,10 @@ class LastFM(object):
                r = csv.reader(f)
                for row in r:
                    try:
-                        timestamps.append(arrow.get(row[0]).timestamp)
+
+                        timestamps.append(int(datetime.fromisoformat(row[0]).timestamp()))
                    except Exception as e:
-                        logging.error("arrow failed on row %s", row)
+                        logging.error("arrow failed on row %s as: %s", row[0], e)
                        continue
        return timestamps

@ -64,7 +67,7 @@ class LastFM(object):
            if ts.timestamp in self.existing:
                continue
            entry = Track(
-                ts.format("YYYY-MM-DDTHH:mm:ssZ"),
+                ts.format("YYYY-MM-DDTHH:mm:ssZZ"),
                track.get("artist").get("#text", ""),
                track.get("album").get("#text", ""),
                track.get("name", ""),
--- a/common.py
+++ b/common.py
@ -8,7 +8,6 @@ import subprocess
 import json
 from io import BytesIO
 import lxml.etree as etree
-from slugify import slugify
 import requests
 from requests.auth import HTTPBasicAuth
 import arrow
@ -18,6 +17,49 @@ import yaml
 from pprint import pprint
 import feedparser

+# https://www.peterbe.com/plog/fastest-python-function-to-slugify-a-string
+NON_URL_SAFE = [
+    '"',
+    "#",
+    "$",
+    "%",
+    "&",
+    "+",
+    ",",
+    "/",
+    ":",
+    ";",
+    "=",
+    "?",
+    "@",
+    "[",
+    "]",
+    "^",
+    "`",
+    "{",
+    "|",
+    "}",
+    "~",
+    "'",
+    ".",
+    "\\",
+]
+# TRANSLATE_TABLE = {ord(char): "" for char in NON_URL_SAFE}
+RE_NON_URL_SAFE = re.compile(
+    r"[{}]".format("".join(re.escape(x) for x in NON_URL_SAFE))
+)
+RE_REMOVESCHEME = re.compile(r"^https?://(?:www)?")
+
+
+def slugify(text):
+    text = RE_REMOVESCHEME.sub("", text).strip()
+    text = RE_NON_URL_SAFE.sub("", text).strip()
+    text = text.lower()
+    text = "_".join(re.split(r"\s+", text))
+    return text
+
+
+
 TMPFEXT = ".xyz"
 MDFEXT = ".md"

@ -42,9 +84,9 @@ def utfyamldump(data):

 def url2slug(url):
    return slugify(
-        re.sub(r"^https?://(?:www)?", "", url),
-        only_ascii=True,
-        lower=True,
+        re.sub(r"^https?://(?:www)?", "", url)
+        #only_ascii=True,
+        #lower=True,
    )[:200]


--- a/3
+++ b/3
@ -1,6 +1,5 @@
 #!/usr/bin/env bash

-set -euo pipefail
-IFS=$'\n\t'
+source .venv/bin/activate

 python3 run.py
--- a/run.py
+++ b/run.py
@ -9,11 +9,11 @@ import HackerNews
 from pprint import pprint

 silos = [
-    DeviantArt.DAFavs(),
    Flickr.FlickrFavs(),
    Tumblr.TumblrFavs(),
+    DeviantArt.DAFavs(),
 #    Artstation.ASFavs(),
-    LastFM.LastFM(),
+#    LastFM.LastFM(),
    HackerNews.HackerNews()
 ]

--- a/settings.py
+++ b/settings.py
@ -9,7 +9,7 @@ class nameddict(dict):
    __delattr__ = dict.__delitem__

 paths = nameddict({
-    "archive": os.path.join(os.path.expanduser('~'), "archive"),
+    "archive": os.path.join(os.path.expanduser('~'), "archiv"),
 })

 loglevels = {"critical": 50, "error": 40, "warning": 30, "info": 20, "debug": 10}