#!/Library/Frameworks/Python.framework/Versions/Current/bin/python from calais import Calais import feedparser, time, math API_KEY = 'ENTER_YOUR_KEY_HERE' calais = Calais(API_KEY, submitter="python-calais") feeds = ['http://feeds.boingboing.net/boingboing/iBag', \ 'http://www.foxnews.com/xmlfeed/rss/0,4313,0,00.rss', \ 'http://gizmodo.com/tag/top/index.xml', \ 'http://www.nytimes.com/services/xml/rss/nyt/HomePage.xml', \ 'http://rss.people.com/web/people/rss/topheadlines/index.xml', \ 'http://feeds.digg.com/digg/popular.rss', \ 'http://gawker.com/tag/top/index.xml' \ ] lastPing = time.time() dataBase = {} for f in feeds: d = feedparser.parse(f) if(not d.feed.has_key('title')): d.feed.title = 'untitled' entitycount = 0 values = {} print 'BLOG: ' + d.feed.title + ' has ' + `len(d.entries)` + ' elements' for e in d.entries[:15]: if 'summary' in e: summary=e.summary else: summary=e.description print 'Summary: ' + summary + "\n" elapsed = time.time()-lastPing if(elapsed < 0.25): time.sleep(0.25 - elapsed) #maxiumum api rate result = calais.analyze(summary.encode('utf-8')) lastPing = time.time() try: #print result.entities names = [x['name'] for x in result.entities] entities = [x['_type'] for x in result.entities] for e in entities: if not e in values: values[e] = 0 values[e] += 1 entitycount += 1 except AttributeError: print "No results from: " + summary.encode('utf-8') normalizedValues = {} for k in values.keys(): normalizedValues[k] = float(values[k]) / entitycount """ a = normalizedValues.keys() def comp(x, y): if normalizedValues[x]>normalizedValues[y]: return 1 elif normalizedValues[x]==normalizedValues[y]: return 0 else: return -1 a.sort(cmp=comp) #for x in a: # print x + ' = ' + `normalizedValues[x]` + "\n" """ dataBase[d.feed.title] = normalizedValues blogList = dataBase.keys() scoreTable = open('scores.csv', 'w') scoreTable.write('Blogs') for blog in blogList: scoreTable.write(',"'+blog+'"') scoreTable.write("\n") #process each blog results allKeys = set() for reference in blogList: scoreTable.write('"'+reference+'"') for comparison in blogList: score = 0.0 if comparison != reference: for e in dataBase[reference].keys(): allKeys.add(e) value = dataBase[reference][e] if e in dataBase[comparison]: difference = abs( value-dataBase[comparison][e] ) else: difference = value score += difference for j in dataBase[comparison].keys(): if not j in dataBase[reference]: score += dataBase[comparison][j] print "Score between " + reference + " and " + comparison + ' is ' + `score` scoreTable.write(','+`score`) scoreTable.write("\n") print "all keys: " + `allKeys` table = open('allblogdata.csv','w') table.write('Blogs') for k in allKeys: table.write(',"'+k+'"') table.write("\n") for b in blogList: table.write('"'+b+'"') for k in allKeys: if k in dataBase[b]: score = dataBase[b][k] else: score = 0.0 table.write(',' + `score`) table.write("\n")