#!/usr/bin/python import math actualFileName = '/home/gregstoll/projects/netflixprize/proberatings.txt' predictionFileName = '/home/gregstoll/stuff/netflixprize/download/dummyoutput.txt' def calculateRMSE(aFN, pFN): aF = open(aFN, 'r') pF = open(pFN, 'r') aStr = aF.readline() pStr = pF.readline() total = 0.0 count = 0 while (pStr != ''): aStr = aStr.strip() pStr = pStr.strip() if (aStr[len(aStr)-1:] == ':'): if (pStr[len(pStr)-1:] != ':'): print "Line mismatch! aStr=%s, pStr=%s" % (aStr, pStr) else: aNum = float(aStr) pNum = float(pStr) total = total + (aNum - pNum) * (aNum - pNum) count = count + 1 aStr = aF.readline() pStr = pF.readline() if (aStr != ''): print "Warning: fewer lines in prediction file than actual file!!" print "found %d ratings (should be 1407563)" % count aF.close() pF.close() rmse = math.sqrt(total/float(count)) print "rmse is %f" % rmse if (__name__ == '__main__'): calculateRMSE(actualFileName, predictionFileName)