__author__ = 'Deepak Sarda'
__version__ = '0.2'
__copyright__ = '(c) 2006 Deepak Sarda'
__license__ = 'GPL'
__url__ = 'http://www.antrix.net/stuff/towatch/'
import re
try:
import feedparser
import imdb
import cElementTree as ET
except ImportError:
print "You need the 'feedparser', 'cElementTree' and 'imdb' python modules"
raise SystemExit
def movie_to_element(movie):
""" Converts a imdb.Movie object to an Element with schema:
<movie>
<title>Movie Title</title>
<id>IMDb Id</id>
<year>1999</year>
<rating>8</rating>
<genres>
<genre>horror</genre>
<genre>comedy</genre>
</genres>
<director>Mr X</director>
<cast>
<member>member 1</member>
<member>member 2</member>
</cast>
</movie>
"""
root = ET.Element("movie")
title = ET.SubElement(root, "title")
title.text = movie['title']
movieid = ET.SubElement(root, "id")
movieid.text = movie.movieID
year = ET.SubElement(root, "year")
year.text = movie['year']
rating = ET.SubElement(root, "rating")
rating.text = str(int(movie['rating']))
director = ET.SubElement(root, "director")
director.text = movie['director'][0]['name']
genres = ET.SubElement(root, "genres")
for g in movie['genres']:
genre = ET.SubElement(genres, "genre")
genre.text = g
cast = ET.SubElement(root, "cast")
for c in movie['cast'][:2]:
member = ET.SubElement(cast, "member")
member.text = c['name']
return root
def fetch_movie_list(urls):
"""Takes list of imdb website movie urls and returns
a list of imdb.Movie info objects."""
p = re.compile('tt(\d{7,7})')
imdb_ids = [m.group(1) for m in filter(None, [re.search(p, u) for u in urls])]
i = imdb.IMDb()
try:
return filter(None, [i.get_movie(uid) for uid in imdb_ids])
except:
return None
def fetch_movie_urls(user, tag):
try:
feed = feedparser.parse("http://del.icio.us/rss/%s/%s" % (user, tag))
return [entry.link for entry in feed.entries]
except:
print "Error in fetch_movie_urls"
return None
if __name__ == '__main__':
user = 'antrix'
tag = 'towatch'
urls = fetch_movie_urls(user, tag)
if not urls:
print "No urls found for user: %s tagged: %s" % (user, tag)
raise SystemExit
movies = fetch_movie_list(urls)
towatch = ET.Element("towatch")
for movie in movies:
towatch.append(movie_to_element(movie))
import codecs
f = codecs.open('towatch.xml','w','utf-8')
header = """<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/xsl" href="towatch.xsl"?>\n"""
f.write(header)
ET.ElementTree(towatch).write(f, encoding='utf-8')
f.close()