#!/usr/bin/python3 import sys, os, re import getopt, time, socket import xml.dom.minidom, Debug from pyPgSQL import PgSQL #iTunesFileName = 'iTunes Music Library.xml' global d keysToDBKeys = {"Track ID" : {"dbName": "trackid", "type":"integer"}, "Name" : {"dbName": "name", "type":"string"}, "Artist" : {"dbName": "artist", "type":"string"}, "Album" : {"dbName" : "album", "type":"string"}, "Track Number" : {"dbName" : "tracknumber", "type":"integer"}, "Play Count" : {"dbName" : "playcount", "type":"integer"}, "Rating" : {"dbName" : "rating", "type":"integer"}, "Comments" : {"dbName" : "comments", "type":"string"}, "Genre" : {"dbName" : "genre", "type":"string"}, "Total Time" : {"dbName": "length", "type":"integer", "function": (lambda x: int(x/1000))}, "Location" : {"dbName": "location", "type": "string"}} dbName = "iTunesTracks" defaultTableName = "tracks" defaultPlaylistsTableName = "playlists" defaultPlaylistsNamesTableName = "playlistsnames" tableCreationString = "(trackid INTEGER PRIMARY KEY, tracknumber INTEGER, playcount INTEGER, rating INTEGER DEFAULT 0, name VARCHAR(300), genre VARCHAR(100), length INTEGER, comments VARCHAR(300), artist VARCHAR(300), album VARCHAR(300), location VARCHAR(450));" topartistString = "CREATE FUNCTION topartist%s (text) RETURNS VARCHAR(300) AS 'SELECT topartist.artist FROM (SELECT artist, COUNT(*) AS acount FROM %s WHERE album=$1 GROUP BY artist ORDER BY acount DESC LIMIT 1) AS topartist WHERE topartist.acount * 2 >= (SELECT COUNT(*) FROM %s WHERE album=$1);' LANGUAGE SQL;" playlistsNamesTableCreationString = "(playlistid INTEGER PRIMARY KEY, playlistname VARCHAR(200));" playlistsTableCreationString = "(playlistid INTEGER, trackid INTEGER);" playlistdataString = "CREATE FUNCTION playlistdata%s (text) RETURNS playlistresults AS 'SELECT * FROM (SELECT playlistname FROM %s WHERE playlistid=$1) AS tempname, (SELECT COUNT(*) FROM %s WHERE playlistid=$1) AS tempblah, (SELECT COUNT(*), ROUND(AVG(rating/20.0),5), ROUND(STDDEV(rating/20.0),5) FROM (SELECT trackid FROM %s WHERE playlistid=$1) AS foo INNER JOIN (SELECT * from %s) AS bar USING (trackid) WHERE rating > 0) AS baz;' LANGUAGE SQL;" videoFileRe = re.compile("video file") def safeQuery(db, queryString): try: query = db.query(queryString) except PgSQL.Error as msg: del db raise Exception("Query failed with message '%s' - query was '%s'" % (msg, queryString)) if (query.resultStatus != PgSQL.COMMAND_OK): del db raise Exception("Query got bad resultStatus %d! Command was %s" % (query.resultStatus, queryString)) return query def createTable(newTableName, playlistsNamesTableName, playlistsTableName, sessionName): db = PgSQL.PQconnectdb('dbname=%s' % dbName) db.query("CREATE TABLE %s %s" % (newTableName, tableCreationString)) db.query("CREATE TABLE %s %s" % (playlistsNamesTableName, playlistsNamesTableCreationString)) db.query("CREATE TABLE %s %s" % (playlistsTableName, playlistsTableCreationString)) db.query(topartistString % (sessionName, newTableName, newTableName)) db.query(playlistdataString % (sessionName, playlistsNamesTableName, playlistsTableName, playlistsTableName, newTableName)) del db def removeTables(tableNames, session): db = PgSQL.PQconnectdb('dbname=%s' % dbName) # We may be removing because of an error, so don't stress if we can't remove these. for tableName in tableNames: try: db.query("DROP TABLE %s" % tableName) except: pass try: db.query("DROP FUNCTION topartist%s(text);" % session) except: pass try: db.query("DROP FUNCTION playlistdata%s(text);" % session) except: pass del db def main(iTunesFileName, tableName, playlistsNamesTableName, playlistsTableName, session=None, statusShelf=None): global d if (statusShelf != None): d = Debug.Debug(0) else: d = Debug.Debug(2) iTunesFile = open(iTunesFileName, 'r') iTunesString = iTunesFile.read() iTunesFile.close() if (statusShelf): statusShelf[session]['status'] = 'Parsing file (this may take a few minutes)...' statusShelf.sync() d.dprint(2, "iTunesString is %d long." % len(iTunesString)) dom1 = xml.dom.minidom.parseString(iTunesString) dom2 = dom1.childNodes[1] dom3 = dom2.childNodes[1] if (statusShelf): statusShelf[session]['status'] = 'Getting trackdata...' statusShelf.sync() trackData = None playlistData = None for child in dom3.childNodes: if (child.nodeName == 'dict'): trackData = child #.nextSibling elif (child.nodeName == 'array'): playlistData = child db = PgSQL.PQconnectdb('dbname=%s' % dbName) if (trackData): d.dprint(2, "trackData: type is %d, name is %s, value is %s" % (trackData.nodeType, trackData.nodeName, trackData.nodeValue)) d.dprint(2,"Got track data - has %d children" % len(trackData.childNodes)) totalTracks = len(trackData.childNodes)/4 db.query('DELETE FROM %s' % tableName) #os.system('psql -d %s -c "DELETE FROM %s;"' % (dbName, tableName)) #os.system('vacuumdb %s' % dbName) numTracks = 0 if (statusShelf): statusShelf[session]['status'] = 'Adding %d tracks to database...' % totalTracks statusShelf.sync() for child in trackData.childNodes: if (child.nodeName == 'dict'): trackDict = child trackMap = {} for child in trackDict.childNodes: if (child.nodeName == 'key'): key = getText(child.childNodes) trackMap[key] = getText(child.nextSibling.childNodes) if ('Kind' in trackMap and not videoFileRe.search(trackMap['Kind'])): pgKeyList = "(" pgValueList = "(" for pgsqlKey in list(keysToDBKeys.keys()): if pgsqlKey in list(trackMap.keys()): # Got a key to use. pgKeyList = pgKeyList + keysToDBKeys[pgsqlKey]['dbName'] + "," valueToUse = trackMap[pgsqlKey] if (keysToDBKeys[pgsqlKey]['type'] == 'integer'): valueToUse = int(valueToUse) if ('function' in keysToDBKeys[pgsqlKey]): valueToUse = keysToDBKeys[pgsqlKey]['function'](valueToUse) if (keysToDBKeys[pgsqlKey]['type'] == 'integer'): pgValueList = pgValueList + str(valueToUse) + "," elif (keysToDBKeys[pgsqlKey]['type'] == 'string'): valueToUse = valueToUse.replace("'", "''") #valueToUse = valueToUse.replace('"', '\\"') pgValueList = pgValueList + "'" + valueToUse + "'," else: raise Exception("ERROR - got unrecognized value type %s for key %s" % (keysToDBKeys[pgsqlKey][type], keysToDBKeys[pgsqlKey])) pgKeyList = pgKeyList[:-1] + ')' pgValueList = pgValueList[:-1] + ')' #d.dprint(2, "keyList is %s, valueList is %s" % (pgKeyList, pgValueList)) dbQuery = "INSERT INTO %s %s VALUES %s" % (tableName, pgKeyList, pgValueList) #print "dbQuery is %s" % dbQuery query = safeQuery(db, dbQuery) #commandLine = "psql -d %s -c \"%s\"" % (dbName, dbQuery) #if (os.system(commandLine) != 0): #print 'Got error running "%s"! Exiting.' % commandLine #sys.exit(1) numTracks = numTracks + 1 if (numTracks % 100 == 0): d.dprint(1, "Done %d/%d tracks..." % (numTracks, totalTracks)) if (statusShelf): statusShelf[session]['status'] = 'Adding %d tracks to database - done %d/%d tracks...' % (totalTracks, numTracks, totalTracks) statusShelf.sync() d.dprint(1, "Got %d tracks." % numTracks) #os.system('vacuumdb %s' % dbName) if (playlistsNamesTableName and playlistData): if (statusShelf): statusShelf[session]['status'] = 'Adding playlist data...' statusShelf.sync() d.dprint(2, "playlistData: type is %d, name is %s, value is %s" % (playlistData.nodeType, playlistData.nodeName, playlistData.nodeValue)) d.dprint(2,"Got playlist data - has %d children" % len(playlistData.childNodes)) db.query('DELETE FROM %s' % playlistsTableName) db.query('DELETE FROM %s' % playlistsNamesTableName) for playlistDict in playlistData.childNodes: if (playlistDict.nodeName == 'dict'): playlistName = None playlistId = None for child in playlistDict.childNodes: if (child.nodeName == 'key'): nodeKey = getText(child.childNodes) if (nodeKey == 'Name'): #print "Got playlist name: %s" % getText(child.nextSibling.childNodes) playlistName = getText(child.nextSibling.childNodes) elif (nodeKey == 'Playlist ID'): playlistId = int(getText(child.nextSibling.childNodes)) elif (nodeKey == 'Playlist Items'): if (playlistName != None and playlistId != None): dbQuery = "INSERT INTO %s(playlistid, playlistname) VALUES (%d, '%s')" % (playlistsNamesTableName, playlistId, playlistName.replace("'", "''")) query = safeQuery(db, dbQuery) playlistArray = child.nextSibling.nextSibling #d.dprint(2, "playlistArray: type is %d, name is %s, value is %s" % (playlistArray.nodeType, playlistArray.nodeName, playlistArray.nodeValue)) #print getText(playlistArray.childNodes) for playlistDict in playlistArray.childNodes: if (playlistDict.nodeType != 3): trackNum = int(getText(playlistDict.childNodes[2].childNodes)) dbQuery = "INSERT INTO %s (playlistid, trackid) VALUES (%d, %d)" % (playlistsTableName, playlistId, trackNum) query = safeQuery(db, dbQuery) del db def getText(nodelist): rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data.encode('utf-8') return rc def attributesString(attributes): if (attributes == None): return '' toReturn = '' i = 0 d.dprint(3, "attributes.length is %d" % attributes.length) for i in range(0, attributes.length): attribute = attributes.item(i) toReturn = toReturn + ("{name = %s, value = %s}" % (attribute.nodeName, attribute.value)) return toReturn def printChildren(dom): global d for child in dom.childNodes: d.dprint(3, "childNode: type is %d, name is %s, value is %s" % (child.nodeType, child.nodeName, child.nodeValue)) if (__name__ == '__main__'): if (len(sys.argv) < 2): print("requires name of .xml file") sys.exit(1) else: main(sys.argv[1], defaultTableName, defaultPlaylistsNamesTableName, defaultPlaylistsTableName)