#!/usr/bin/python3 import os, time from xml.sax import saxutils import psycopg2 # From http://pypgsql.sourceforge.net/pypgsql-faq.html #pyPgSQL.fetchReturnsList = 1 #pyPgSQL.noPostgresCursor = 1 defaultDbName = "iTunesTracks" defaultTableName = "tracks" defaultTopArtistName = "topartist" defaultPlaylistsNamesTableName = "playlistsnames" defaultPlaylistsTableName = "playlists" defaultPlaylistDataName = "playlistdata" defaultOutputFileName = "analysis.html" def makeSummize(output, ratingInfo, width=150): total = 0 for i in range(6): if (i not in ratingInfo): ratingInfo[i] = 0 else: total += ratingInfo[i] output.append('') cumWidth = 0 for i in range(6): if (ratingInfo[i] > 0): newWidth = (ratingInfo[i] * width)/total songWord = "songs" if (ratingInfo[i] == 1): songWord = "song" output.append('' % (i, ratingInfo[i], songWord, i, newWidth, cumWidth)) cumWidth += newWidth output.append('\n') curIndex = 0 def chartCallback(connection, index, tableName, column): def tempCallback(output, item, whereInTable): global curIndex if (whereInTable == 0): if (index == curIndex): if (item == None or item == 'None'): whereString = "%s IS NULL" % column else: whereString = "%s='%s'" % (column, str(item).replace("'", "''")) sqlQuery = "SELECT rating/20 AS rating, COUNT(*) AS numTracks FROM %s WHERE %s GROUP BY rating ORDER BY rating;" % (tableName, whereString) tempCursor = connection.cursor() tempCursor.execute(sqlQuery) ratingInfo = {} for record in tempCursor: ratingInfo[int(record[0])] = int(record[1]) output.append('') makeSummize(output, ratingInfo) output.append('') curIndex += 1 else: curIndex = 0 return tempCallback curIndex = 0 def playlistCallback(connection, index, tableName, playlistsNamesTableName, playlistsTableName): def tempCallback(output, item, whereInTable): global curIndex if (whereInTable == 0): if (index == curIndex): if (item == None): item = '' # SELECT rating/20 AS rating, COUNT(*) AS num FROM ((SELECT playlistid FROM playlistsnames WHERE playlistname='Guitar Hero') AS baz INNER JOIN (SELECT trackid FROM playlists) AS foo USING (playlistid)) INNER JOIN (SELECT * from tracks) AS bar USING (trackid) GROUP BY rating; #sqlQuery = "SELECT rating/20 AS rating, COUNT(*) AS numTracks FROM (SELECT trackid from %s WHERE playlistid='%s') AS foo INNER JOIN (SELECT * from %s) AS bar USING (trackid) GROUP BY rating;" % (tableName, column, str(item).replace("'", "''")) # TODO sqlQuery = "SELECT rating/20 AS rating, COUNT(*) FROM %s, %s, %s WHERE %s.playlistname='%s' AND %s.playlistid=%s.playlistid AND %s.trackid=%s.trackid GROUP BY rating;" % (tableName, playlistsNamesTableName, playlistsTableName, playlistsNamesTableName, str(item).replace("'", "''"), playlistsNamesTableName, playlistsTableName, tableName, playlistsTableName) tempCursor = connection.cursor() tempCursor.execute(sqlQuery) ratingInfo = {} for record in tempCursor: ratingInfo[int(record[0])] = int(record[1]) output.append('') makeSummize(output, ratingInfo) output.append('') curIndex += 1 else: curIndex = 0 return tempCallback overallRatingInfo = {} def overallRatingCallback(output, row, whereInTable): if (row != None): overallRatingInfo[row[0]] = int(row[1]) else: if (whereInTable == 2): output.append('
') makeSummize(output, overallRatingInfo) output.append('
') #maxNum = 0 #for i in range(6): # if (i not in ratingInfo): # ratingInfo[i] = 0 # else: # maxNum = max(ratingInfo[i], maxNum) ## Figure out how to make the graph. #mainOutput.append('\n') #print "got row: %s" % row linksOutput = [] mainOutput = [] def main(dbName, tableName, topArtistName, playlistsNamesTableName, playlistsTableName, playlistDataName, outputFileName, session=None, statusShelf=None): if (os.path.exists(outputFileName)): os.remove(outputFileName) outputFile = open(outputFileName, 'a') if (statusShelf != None): statusShelf['data']['status'] = 'Performing analysis on data...' statusShelf.sync() connection = psycopg2.connect('dbname=%s' % dbName) connection.set_client_encoding('UNICODE') cursor = connection.cursor() outputFile.write('\n') outputFile.write('\niTunes Rating Analysis\n') outputFile.write('\n') outputFile.write('\n') outputFile.write('\n') outputFile.write('\n') outputFile.write('\n') outputFile.write('

This analysis done by Greg\'s iTunes Rating Analysis on %s

\n' % time.asctime()) findFavoriteGroups(cursor, tableName, topArtistName, outputFile) outputFile.write('

Click on the headers to sort by column. The little charts are based on summize.com - each color represents a different rating (black is 0, red is 1, etc.). You can mouse over the colors to see the exact number of songs that had that rating.

\n') linksOutput.append('') for line in linksOutput: outputFile.write(line) for line in mainOutput: outputFile.write(line) outputFile.write('

Valid HTML 4.01 Strict\n') outputFile.write('Valid CSS!

\n') if (outputFileName == defaultOutputFileName): # Add tracking, etc. to local one outputFile.write('') outputFile.write('\n') outputFile.close() def findFavoriteGroups(cursor, tableName, topArtistName, outputFile): # Find top 5 artists by number of 4 and 5 rated songs and by average rating # (with more than 4 songs rated) artists = set() cursor.execute("SELECT artist, count(*) as numtracks FROM %s WHERE rating >= 80 GROUP BY artist ORDER BY numtracks DESC LIMIT 5;" % tableName) for record in cursor: artists.update([record[0]]) cursor.execute("SELECT artist, count(*) as numtracks FROM %s WHERE rating = 100 GROUP BY artist ORDER BY numtracks DESC LIMIT 5;" % tableName) for record in cursor: artists.update([record[0]]) cursor.execute("SELECT * FROM (SELECT artist, COUNT(*) AS numTracks, AVG(rating) AS avgRating FROM %s WHERE rating > 20 GROUP BY artist) AS foo WHERE foo.numTracks > 4 ORDER BY foo.avgRating DESC LIMIT 5;" % tableName) for record in cursor: artists.update([record[0]]) artistsList = list(artists) artistsList.sort() outputFile.write('

Favorite artists: %s

' % (', '.join(artistsList))) # TODO - topartist() function is really slow here cursor.execute("SELECT foo.album, %s(foo.album) FROM (SELECT album, COUNT(*) AS numTracks, AVG(rating) AS avgRating FROM %s WHERE rating > 20 GROUP BY album) AS foo WHERE foo.numTracks > 4 ORDER BY foo.avgRating DESC LIMIT 5;" % (topArtistName, tableName)) albumsList = [] for record in cursor: albumsList.append((record[0], record[1])) def artistString(artistTuple): if (artistTuple[1] != '' and artistTuple[1] != 'None'): return "%s (by %s)" % artistTuple else: return "%s" % artistTuple[0] outputFile.write('

Favorite albums: %s

' % (', '.join([artistString(x) for x in albumsList]))) def executeCommand(cursor, sqlQuery, description, headers, hrefName, rowCallback=None, callAfterEachItem=True): cursor.execute(sqlQuery) linksOutput.append('
  • %s
  • ' % (hrefName, description)) classStr = '' # Make the table sortable if there's more than one result. if (cursor.rowcount > 1): classStr = 'class="sortable" ' mainOutput.append('\n' % (classStr, hrefName, saxutils.escape(description))) mainOutput.append("") for header in headers: mainOutput.append("" % saxutils.escape(header)) mainOutput.append("\n") for record in cursor: mainOutput.append("") if (rowCallback != None): row = [] # TODO - make constants for whereInTable for item in record: if (rowCallback != None): row.append(item) mainOutput.append("" % saxutils.escape(str(item))) if (callAfterEachItem and rowCallback): rowCallback(mainOutput, item, 0) if (not callAfterEachItem and rowCallback): rowCallback(mainOutput, row, 1) mainOutput.append("\n") if (callAfterEachItem and rowCallback): rowCallback(mainOutput, None, 1) mainOutput.append("
    %s
    %s
    %s
    \n") if (rowCallback): rowCallback(mainOutput, None, 2) #commandLine = "psql -d %s -c \"%s\" >> %s" % (dbName, sqlQuery, outputFileName) #os.system(commandLine) if (__name__ == '__main__'): main(defaultDbName, defaultTableName, defaultTopArtistName, defaultPlaylistsNamesTableName, defaultPlaylistsTableName, defaultPlaylistDataName, defaultOutputFileName)