#!/usr/bin/python
#
# towatch.py : A script to fetch and merge movie data from del.icio.us and imdb.com
#
# author:   Deepak Sarda
#           firstname@antrix.net
#
# Changelog:
# 0.2.1 raise SystemExit!
# 0.2   Added extra xml markup for an xsl file
#       Using UTF-8 encoding for file output
# 0.1   Fetching data and xml writing works

__author__      = 'Deepak Sarda'
__version__     = '0.2'
__copyright__   = '(c) 2006 Deepak Sarda'
__license__     = 'GPL'
__url__         = 'http://www.antrix.net/stuff/towatch/'

import re
try:
    import feedparser           # http://feedparser.org/

    import imdb                 # http://imdbpy.sourceforge.net/

    import cElementTree as ET   # http://effbot.org/zone/celementtree.htm

except ImportError:
    print "You need the 'feedparser', 'cElementTree' and 'imdb' python modules"
    raise SystemExit

def movie_to_element(movie):
    """ Converts a imdb.Movie object to an Element with schema:
    <movie>
    <title>Movie Title</title>
    <id>IMDb Id</id>
    <year>1999</year>
    <rating>8</rating>
    <genres>
    <genre>horror</genre>
    <genre>comedy</genre>
    </genres>
    <director>Mr X</director>
    <cast>
    <member>member 1</member>
    <member>member 2</member>
    </cast>
    </movie>
    """
    root = ET.Element("movie")
    title = ET.SubElement(root, "title")
    title.text = movie['title']

    movieid = ET.SubElement(root, "id")
    movieid.text = movie.movieID

    year = ET.SubElement(root, "year")
    year.text = movie['year']

    rating = ET.SubElement(root, "rating")
    rating.text = str(int(movie['rating']))

    director = ET.SubElement(root, "director")
    director.text = movie['director'][0]['name']

    genres = ET.SubElement(root, "genres")

    for g in movie['genres']:
        genre = ET.SubElement(genres, "genre")
        genre.text = g

    cast = ET.SubElement(root, "cast")

    for c in movie['cast'][:2]:
        member = ET.SubElement(cast, "member")
        member.text = c['name']

    return root


def fetch_movie_list(urls):
    """Takes list of imdb website movie urls and returns
    a list of imdb.Movie info objects."""

    p = re.compile('tt(\d{7,7})') # Each movie title has a unique 7 digit imdb id

    imdb_ids = [m.group(1) for m in filter(None, [re.search(p, u) for u in urls])]
    #print imdb_ids

    i = imdb.IMDb()

    try:
        return filter(None, [i.get_movie(uid) for uid in imdb_ids])
    except:
        return None

def fetch_movie_urls(user, tag):
    try:
        feed = feedparser.parse("http://del.icio.us/rss/%s/%s" % (user, tag))
        return [entry.link for entry in feed.entries]
    except:
        print "Error in fetch_movie_urls"
        return None

if __name__ == '__main__':
    user = 'antrix'
    tag = 'towatch'

    urls = fetch_movie_urls(user, tag)
    if not urls:
        print "No urls found for user: %s tagged: %s" % (user, tag)
        raise SystemExit
    # for u in urls: print u

    movies = fetch_movie_list(urls)

    towatch = ET.Element("towatch")

    for movie in movies:
        towatch.append(movie_to_element(movie))

    #ET.dump(towatch)
    import codecs
    f = codecs.open('towatch.xml','w','utf-8')
    header = """<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/xsl" href="towatch.xsl"?>\n"""
    f.write(header)
    ET.ElementTree(towatch).write(f, encoding='utf-8')
    f.close()