2018-10-15 14:16:10 +01:00
|
|
|
import os
|
|
|
|
import csv
|
|
|
|
import json
|
|
|
|
import logging
|
|
|
|
from operator import attrgetter
|
|
|
|
from collections import namedtuple
|
|
|
|
import requests
|
|
|
|
import arrow
|
|
|
|
import settings
|
|
|
|
import keys
|
|
|
|
from pprint import pprint
|
|
|
|
|
|
|
|
Track = namedtuple(
|
2019-07-13 21:01:57 +01:00
|
|
|
"Track", ["timestamp", "artist", "album", "title", "artistid", "albumid", "img"]
|
2018-10-15 14:16:10 +01:00
|
|
|
)
|
|
|
|
|
2019-04-29 09:29:50 +01:00
|
|
|
|
|
|
|
class cached_property(object):
|
|
|
|
""" extermely simple cached_property decorator:
|
|
|
|
whenever something is called as @cached_property, on first run, the
|
|
|
|
result is calculated, then the class method is overwritten to be
|
|
|
|
a property, contaning the result from the method
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, method, name=None):
|
|
|
|
self.method = method
|
|
|
|
self.name = name or method.__name__
|
|
|
|
|
|
|
|
def __get__(self, inst, cls):
|
|
|
|
if inst is None:
|
|
|
|
return self
|
|
|
|
result = self.method(inst)
|
|
|
|
setattr(inst, self.name, result)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2018-10-15 14:16:10 +01:00
|
|
|
class LastFM(object):
|
2019-07-13 21:01:57 +01:00
|
|
|
url = "http://ws.audioscrobbler.com/2.0/"
|
2018-10-15 14:16:10 +01:00
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
self.params = {
|
2019-07-13 21:01:57 +01:00
|
|
|
"method": "user.getrecenttracks",
|
|
|
|
"user": keys.lastfm.get("username"),
|
|
|
|
"api_key": keys.lastfm.get("key"),
|
|
|
|
"format": "json",
|
|
|
|
"limit": "200",
|
2018-10-15 14:16:10 +01:00
|
|
|
}
|
|
|
|
if os.path.isfile(self.target):
|
|
|
|
mtime = os.path.getmtime(self.target)
|
2019-07-13 21:01:57 +01:00
|
|
|
self.params.update({"from": mtime})
|
2018-10-15 14:16:10 +01:00
|
|
|
|
|
|
|
@property
|
|
|
|
def target(self):
|
2019-07-13 21:01:57 +01:00
|
|
|
return os.path.join(settings.paths.get("archive"), "lastfm.csv")
|
2018-10-15 14:16:10 +01:00
|
|
|
|
2019-04-29 09:29:50 +01:00
|
|
|
@cached_property
|
|
|
|
def existing(self):
|
|
|
|
timestamps = []
|
2019-07-13 21:01:57 +01:00
|
|
|
with open(self.target, "r") as f:
|
2019-04-29 09:29:50 +01:00
|
|
|
r = csv.reader(f)
|
|
|
|
for row in r:
|
|
|
|
try:
|
|
|
|
timestamps.append(arrow.get(row[0]).timestamp)
|
|
|
|
except Exception as e:
|
2019-07-13 21:01:57 +01:00
|
|
|
logging.error("arrow failed on row %s", row)
|
2019-04-29 09:29:50 +01:00
|
|
|
continue
|
|
|
|
return timestamps
|
|
|
|
|
2018-10-15 14:16:10 +01:00
|
|
|
@property
|
|
|
|
def exists(self):
|
|
|
|
return os.path.isfile(self.target)
|
|
|
|
|
|
|
|
def extracttracks(self, data):
|
|
|
|
tracks = []
|
2019-04-29 09:29:50 +01:00
|
|
|
if not data:
|
|
|
|
return tracks
|
2019-07-13 21:01:57 +01:00
|
|
|
for track in data.get("track", []):
|
|
|
|
if "date" not in track:
|
2018-10-15 14:16:10 +01:00
|
|
|
continue
|
2019-07-13 21:01:57 +01:00
|
|
|
ts = arrow.get(int(track.get("date").get("uts")))
|
2019-04-29 09:29:50 +01:00
|
|
|
if ts.timestamp in self.existing:
|
|
|
|
continue
|
2018-10-15 14:16:10 +01:00
|
|
|
entry = Track(
|
2019-07-13 21:01:57 +01:00
|
|
|
ts.format("YYYY-MM-DDTHH:mm:ssZ"),
|
|
|
|
track.get("artist").get("#text", ""),
|
|
|
|
track.get("album").get("#text", ""),
|
|
|
|
track.get("name", ""),
|
|
|
|
track.get("artist").get("mbid", ""),
|
|
|
|
track.get("album").get("mbid", ""),
|
|
|
|
track.get("image", [])[-1].get("#text", ""),
|
2018-10-15 14:16:10 +01:00
|
|
|
)
|
|
|
|
tracks.append(entry)
|
|
|
|
return tracks
|
|
|
|
|
|
|
|
def fetch(self):
|
|
|
|
r = requests.get(self.url, params=self.params)
|
2019-07-13 21:01:57 +01:00
|
|
|
return json.loads(r.text).get("recenttracks")
|
2018-10-15 14:16:10 +01:00
|
|
|
|
|
|
|
def run(self):
|
2019-04-29 09:29:50 +01:00
|
|
|
try:
|
|
|
|
data = self.fetch()
|
|
|
|
tracks = self.extracttracks(data)
|
2019-07-13 21:01:57 +01:00
|
|
|
total = int(data.get("@attr").get("totalPages"))
|
|
|
|
current = int(data.get("@attr").get("page"))
|
2019-04-29 09:29:50 +01:00
|
|
|
cntr = total - current
|
|
|
|
except Exception as e:
|
2019-07-13 21:01:57 +01:00
|
|
|
logging.error("Something went wrong: %s", e)
|
2019-04-29 09:29:50 +01:00
|
|
|
return
|
2018-10-15 14:16:10 +01:00
|
|
|
|
|
|
|
if not len(tracks):
|
|
|
|
return
|
|
|
|
|
|
|
|
while cntr > 0:
|
|
|
|
current = current + 1
|
|
|
|
cntr = total - current
|
2019-07-13 21:01:57 +01:00
|
|
|
logging.info("requesting page #%d of paginated results", current)
|
|
|
|
self.params.update({"page": current})
|
2018-10-15 14:16:10 +01:00
|
|
|
data = self.fetch()
|
|
|
|
tracks = tracks + self.extracttracks(data)
|
|
|
|
|
|
|
|
if not self.exists:
|
2019-07-13 21:01:57 +01:00
|
|
|
with open(self.target, "w") as f:
|
2018-10-15 14:16:10 +01:00
|
|
|
writer = csv.DictWriter(f, fieldnames=Track._fields)
|
|
|
|
writer.writeheader()
|
|
|
|
|
|
|
|
if len(tracks):
|
2019-07-13 21:01:57 +01:00
|
|
|
with open(self.target, "a") as f:
|
2018-10-15 14:16:10 +01:00
|
|
|
writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
|
2019-07-13 21:01:57 +01:00
|
|
|
writer.writerows(sorted(tracks, key=attrgetter("timestamp")))
|
2018-10-15 14:16:10 +01:00
|
|
|
|
|
|
|
|
2019-07-13 21:01:57 +01:00
|
|
|
if __name__ == "__main__":
|
2018-10-15 14:16:10 +01:00
|
|
|
lfm = LastFM()
|
|
|
|
lfm.run()
|