#!/usr/bin/python3 import sys, os, re import getopt, time, socket import Debug import xml.sax, xml.sax.handler import psycopg2 global d keysToDBKeys = {"Track ID" : {"dbName": "trackid", "type":"integer"}, "Name" : {"dbName": "name", "type":"string"}, "Artist" : {"dbName": "artist", "type":"string"}, "Album" : {"dbName" : "album", "type":"string"}, "Track Number" : {"dbName" : "tracknumber", "type":"integer"}, "Play Count" : {"dbName" : "playcount", "type":"integer"}, "Rating" : {"dbName" : "rating", "type":"integer"}, "Comments" : {"dbName" : "comments", "type":"string"}, "Genre" : {"dbName" : "genre", "type":"string"}, "Total Time" : {"dbName": "length", "type":"integer", "function": (lambda x: int(x/1000))}, "Location" : {"dbName": "location", "type": "string"}} dbName = "iTunesTracks" defaultTableName = "tracks" defaultPlaylistsTableName = "playlists" defaultPlaylistsNamesTableName = "playlistsnames" tableCreationString = "(trackid INTEGER PRIMARY KEY, tracknumber INTEGER, playcount INTEGER, rating INTEGER DEFAULT 0, name VARCHAR(300), genre VARCHAR(100), length INTEGER, comments VARCHAR(300), artist VARCHAR(300), album VARCHAR(300), location VARCHAR(450));" topartistString = "CREATE FUNCTION topartist%s (text) RETURNS VARCHAR(300) AS 'SELECT topartist.artist FROM (SELECT artist, COUNT(*) AS acount FROM %s WHERE album=$1 GROUP BY artist ORDER BY acount DESC LIMIT 1) AS topartist WHERE topartist.acount * 2 >= (SELECT COUNT(*) FROM %s WHERE album=$1);' LANGUAGE SQL;" playlistsNamesTableCreationString = "(playlistid INTEGER PRIMARY KEY, playlistname VARCHAR(200));" playlistsTableCreationString = "(playlistid INTEGER, trackid INTEGER);" playlistdataString = "CREATE FUNCTION playlistdata%s (text) RETURNS playlistresults AS 'SELECT * FROM (SELECT playlistname FROM %s WHERE playlistid=$1) AS tempname, (SELECT COUNT(*) FROM %s WHERE playlistid=$1) AS tempblah, (SELECT COUNT(*), ROUND(AVG(rating/20.0),5), ROUND(STDDEV(rating/20.0),5) FROM (SELECT trackid FROM %s WHERE playlistid=$1) AS foo INNER JOIN (SELECT * from %s) AS bar USING (trackid) WHERE rating > 0) AS baz;' LANGUAGE SQL;" videoFileRe = re.compile("video file") def safeQuery(db, queryString): try: db.execute(queryString) except psycopg2.Error as msg: del db raise Exception("Query failed with message '%s' - query was '%s'" % (msg, queryString)) #def insertTrack(db, tableName, trackDict): # try: # db.execute("INSERT INTO " + tableName + " (trackid, tracknumber, playcount, rating, name, genre, length, comments, artist, album, location) VALUES (%(trackid)s, %(tracknumber)s, %(playcount)s, %(rating)s, %(name)s, %(genre)s, %(length)s, %(comments)s, %(artist)s, %(album)s, %(location)s);", trackDict) # except psycopg2.Error, msg: # del db # raise Exception, "Query failed with message '%s' - query was '%s'" % (msg, queryString) def insertPlaylistEntry(db, playlistsTableName, playlistID, trackID): try: db.execute("INSERT INTO " + playlistsTableName + " (playlistid, trackid) VALUES (%(playlistID)s, %(trackID)s);", dict(playlistID=int(playlistID), trackID=int(trackID))) except psycopg2.Error as msg: del db raise Exception("Query failed with message '%s'" % (msg)) def createTable(newTableName, playlistsNamesTableName, playlistsTableName, sessionName): db = psycopg2.connect('dbname=%s' % dbName) conn = db.cursor() conn.execute("CREATE TABLE %s %s" % (newTableName, tableCreationString)) conn.execute("CREATE TABLE %s %s" % (playlistsNamesTableName, playlistsNamesTableCreationString)) conn.execute("CREATE TABLE %s %s" % (playlistsTableName, playlistsTableCreationString)) conn.execute(topartistString % (sessionName, newTableName, newTableName)) conn.execute(playlistdataString % (sessionName, playlistsNamesTableName, playlistsTableName, playlistsTableName, newTableName)) del conn del db def removeTables(tableNames, session): db = psycopg2.connect('dbname=%s' % dbName) conn = db.cursor() # We may be removing because of an error, so don't stress if we can't remove these. for tableName in tableNames: try: conn.execute("DROP TABLE %s" % tableName) except: pass try: conn.execute("DROP FUNCTION topartist%s(text);" % session) except: pass try: conn.execute("DROP FUNCTION playlistdata%s(text);" % session) except: pass del conn del db class State: Begin = 0 InStartTracks = 1 InTracks = 2 InTrackData = 3 AfterTracks = 4 InStartPlaylists = 5 InPlaylistHeader = 6 InPlaylistItems = 7 class Handler(xml.sax.handler.ContentHandler): def __init__(self, tableName, playlistsNamesTableName, playlistsTableName, session, statusShelf, db): self.tableName = tableName self.playlistsNamesTableName = playlistsNamesTableName self.playlistsTableName = playlistsTableName self.session = session self.statusShelf = statusShelf self.db = db self.state = State.Begin self.lastKeyName = '' self.inKey = False self.inValue = False self.lastTrackKey = None self.tracksNestingLevel = 0 self.lastValue = '' self.curTrackData = {} self.numTracks = 0 self.playlistName = None self.playlistID = None def startElement(self, name, attrs): if name.lower() == 'key': if self.state == State.InTrackData: self.curTrackData[self.lastKeyName] = self.lastValue elif self.state == State.InPlaylistHeader: if self.lastKeyName.lower() == 'name': self.playlistName = self.lastValue elif self.lastKeyName.lower() == 'playlist id': self.playlistID = self.lastValue self.inKey = True self.lastKeyName = '' elif name.lower() == 'dict': if self.state == State.InStartTracks: self.state = State.InTracks elif self.state == State.InTracks or self.state == State.InTrackData: self.tracksNestingLevel += 1 if self.state == State.InTracks: self.state = State.InTrackData self.lastTrackKey = self.lastKeyName elif self.state == State.InTrackData or self.state == State.InPlaylistHeader or self.state == State.InPlaylistItems: self.inValue = True self.lastValue = '' elif self.state == State.AfterTracks and self.lastKeyName.lower() == 'playlists' and name.lower() == 'array': if (self.playlistsNamesTableName): self.db.execute('DELETE FROM %s' % self.playlistsTableName) self.db.execute('DELETE FROM %s' % self.playlistsNamesTableName) self.state = State.InPlaylistHeader if (self.statusShelf): self.statusShelf[self.session]['status'] = 'Adding playlist data...' self.statusShelf.sync() def endElement(self, name): nameLower = name.lower() if self.inKey and nameLower == 'key': self.inKey = False if self.state == State.Begin and self.lastKeyName.lower() == 'tracks': self.state = State.InStartTracks elif self.state == State.InPlaylistHeader and self.lastKeyName.lower() == 'playlist items': dbQuery = "INSERT INTO %s (playlistid, playlistname) VALUES (%s, '%s');" % (self.playlistsNamesTableName, self.playlistID, self.playlistName.replace("'", "''")) safeQuery(self.db, dbQuery) self.state = State.InPlaylistItems elif nameLower == 'dict' and (self.state == State.InTracks or self.state == State.InTrackData): self.tracksNestingLevel -= 1 if self.state == State.InTrackData: self.curTrackData[self.lastKeyName] = self.lastValue self.processTrack(self.lastTrackKey, self.curTrackData) self.curTrackData = {} self.state = State.InTracks if self.tracksNestingLevel < 0: self.state = State.AfterTracks elif self.state == State.InPlaylistItems: if nameLower == 'array': self.state = State.InPlaylistHeader elif nameLower == 'dict' and self.lastKeyName == 'Track ID': insertPlaylistEntry(self.db, self.playlistsTableName, self.playlistID, self.lastValue) #dbQuery = "INSERT INTO %s (playlistid, trackid) VALUES (%s, %s);" % (self.playlistsTableName, self.playlistID, self.lastValue) #safeQuery(self.db, dbQuery) self.inValue = False def characters(self, content): if self.inKey: self.lastKeyName += content elif (self.state == State.InTrackData or self.state == State.InPlaylistHeader or self.state == State.InPlaylistItems) and self.inValue: self.lastValue += content def ignorableWhitespace(self, whitespace): # TODO? pass def processTrack(self, trackKey, trackMap): #print "key: %s data: %s" % (trackKey, trackMap) if ('Kind' in trackMap and not videoFileRe.search(trackMap['Kind'])): pgKeyList = "(" pgValueList = "(" #trackDict = dict(trackid=0, name='', artist='', album='', tracknumber=None, playcount=0, rating=0, comments=None, genre=None, length=0, location='') for pgsqlKey in list(keysToDBKeys.keys()): if pgsqlKey in list(trackMap.keys()): # Got a key to use. pgKeyList = pgKeyList + keysToDBKeys[pgsqlKey]['dbName'] + "," valueToUse = trackMap[pgsqlKey] if (keysToDBKeys[pgsqlKey]['type'] == 'integer'): valueToUse = int(valueToUse) if ('function' in keysToDBKeys[pgsqlKey]): valueToUse = keysToDBKeys[pgsqlKey]['function'](valueToUse) #trackDict[keysToDBKeys[pgsqlKey]['dbName']] = valueToUse if (keysToDBKeys[pgsqlKey]['type'] == 'integer'): pgValueList = pgValueList + str(valueToUse) + "," elif (keysToDBKeys[pgsqlKey]['type'] == 'string'): valueToUse = valueToUse.replace("'", "''") pgValueList = pgValueList + "'" + valueToUse + "'," else: raise Exception("ERROR - got unrecognized value type %s for key %s" % (keysToDBKeys[pgsqlKey][type], keysToDBKeys[pgsqlKey])) pgKeyList = pgKeyList[:-1] + ')' pgValueList = pgValueList[:-1] + ')' # FFV - pass with parameters? #insertTrack(self.db, self.tableName, trackDict) dbQuery = "INSERT INTO %s %s VALUES %s;" % (self.tableName, pgKeyList, pgValueList) safeQuery(self.db, dbQuery) #commandLine = "psql -d %s -c \"%s\"" % (dbName, dbQuery) #if (os.system(commandLine) != 0): #print 'Got error running "%s"! Exiting.' % commandLine #sys.exit(1) self.numTracks = self.numTracks + 1 if (self.numTracks % 100 == 0): d.dprint(1, "Done %d tracks..." % (self.numTracks)) if (self.statusShelf): self.statusShelf[self.session]['status'] = 'Adding tracks to database - done %d tracks...' % (self.numTracks) self.statusShelf.sync() def main(iTunesFileName, tableName, playlistsNamesTableName, playlistsTableName, session=None, statusShelf=None): global d if (statusShelf != None): d = Debug.Debug(0) else: d = Debug.Debug(2) if (statusShelf): statusShelf[session]['status'] = 'Parsing file (this may take a few minutes)...' statusShelf.sync() #d.dprint(2, "iTunesString is %d long." % len(iTunesString)) db = psycopg2.connect('dbname=%s' % dbName) psycopg2.paramstyle = 'pyformat' conn = db.cursor() handler = Handler(tableName, playlistsNamesTableName, playlistsTableName, session, statusShelf, conn) conn.execute('DELETE FROM %s' % tableName) xml.sax.parse(iTunesFileName, handler) del conn del db if (__name__ == '__main__'): if (len(sys.argv) < 2): print("requires name of .xml file") sys.exit(1) else: main(sys.argv[1], defaultTableName, defaultPlaylistsNamesTableName, defaultPlaylistsTableName)