Context Navigation

Back to Ticket #2734

Ticket #2734: metadata_script_improvements_against_trunk_07-03-04.patch

File metadata_script_improvements_against_trunk_07-03-04.patch, 15.8 KB (added by visit0r, 17 years ago)
Improvements since yesterday (major addition: new poster fetching script).

mythvideo/mythvideo/scripts/imdbpy.py

         print "You need to install the IMDbPy library "\
                 "from (http://imdbpy.sourceforge.net/?page=download)"
         sys.exit(1)
+episode_title_format = '%(series_title)s S%(season)d E%(episode)d "%(episode_title)s"'
 def detect_series_query(search_string):
         """
         Detects a series episode query.
 …
         "Sopranos S1E2"
         "Sopranos S1 E2"
         "Sopranos 1x2"
+        "Sopranos - 1x2"
         """
+        m = re.match(r"(.+)\s*(s|(season)|\s)\s*(\d+)\s*(e|(episode)|x)\s*(\d+)",
+                        search_string.lower())
+        if m is None or m.group(1) is None or m.group(4) is None or \
+                        m.group(7) is None:
+        m = re.match(r"((?P<title>.+?)(-)?)?(\s*)"\
+                "(s|(season)|\s)\s*(?P<season>\d+)"\
+                "\s*(e|(episode)|x)\s*(?P<episode>\d+)",
+                search_string.lower())
+        if m is None or m.group('title') is None or m.group('season') is None \
+                        or m.group('episode') is None:
                 return (None, None, None)
         return (m.group(1), m.group(4), m.group(7))
+        return (m.group('title'), m.group('season'), m.group('episode'))
 def episode_search(title, season, episode):
         matches = []
         imdb_access = imdb.IMDb()
         series = imdb_access.search_movie(title.encode("ascii", 'replace'))
+        season = int(season)
+        episode = int(episode)
         for serie in series:
                 if serie['kind'] == 'tv series':
 …
                         imdb_access.update(serie, 'episodes')
                         if serie.has_key('episodes'):
                                 try:
                                         ep = serie['episodes'][int(season)][int(episode)]
+                                        ep = serie['episodes'][season][episode]
                                 except:
                                         # Probably indexing exception in case the episode/season
                                         # is not found.
                                         continue
                                 # Found an exact episode match, return that match only.
                                 matches = []
+                                matches.append([imdb_access.get_imdbID(ep),
+                                                title.title().strip() + ", S" + season + " E" +
+                                                episode, int(serie['year'])])
+                                episode_title = episode_title_format % \
+                                                {
+                                                        'series_title': ep['series title'],
+                                                        'season': season,
+                                                        'episode': episode,
+                                                        'episode_title': ep['title']
+                                                }
+                                matches.append([imdb_access.get_imdbID(ep), episode_title ,int(ep['year'])])
                                 return matches
                         else:
                                 matches.append([imdb_access.get_imdbID(serie),
 …
         if movie['kind'] == 'episode':
                 # print "TV Series episode detected"
+                metadata += 'Title:%s, S%d E%d: \"%s\"' % \
+                                (movie['series title'], int(movie['season']),
+                int(movie['episode']), movie['title']) + "\n"
+                metadata += 'Title:' + episode_title_format % \
+                                                {
+                                                        'series_title': movie['series title'],
+                                                        'season': int(movie['season']),
+                                                        'episode': int(movie['episode']),
+                                                        'episode_title': movie['title'],
+                                                } + '\n'
                 series = movie['episode of']
                 imdb_access.update(series)
                 metadata += createMetadataFromFirst('Runtime', 'runtimes', m=series)
 …
         metadata += createMetadataFromFirst('Countries', 'countries')
         return metadata
+def parse_meta(meta, key):
+        for line in meta.split("\n"):
+                beginning = key + ":"
+                if line.startswith(beginning):
+                        return line[len(beginning):].strip()
+        return None
 def main():
         p = optparse.OptionParser()
         p.add_option('--version', '-v', action="store_true", default=False,

mythvideo/mythvideo/scripts/fetch_poster.py

+#!/usr/bin/python
+# -*- coding: utf8 -*-
+"""
+This Python script is intended to find the best possible poster/cover image
+for a video.
+Uses the following www-sites for scraping for the poster image (in this
+order):
+        movieposter.com
+        imdb.com
+Picks the largest (in pixels) vertical poster.
+Written by Pekka JÃ€Ã€skelÃ€inen (gmail: pekka.jaaskelainen) 2007
+"""
+import urllib
+import re
+import tempfile
+import os
+import optparse
+import sys
+import imdbpy
+movie_poster_site = True
+try:
+        import BeautifulSoup
+except:
+        print """BeautifulSoup class is required for parsing the MoviePoster site.
+In Debian/Ubuntu it is packaged as 'python-beautifulsoup'.
+http://www.crummy.com/software/BeautifulSoup/#Download/"""
+        movie_poster_site = False
+imaging_library = True
+try:
+        import Image
+except:
+        print """Python Imaging Library is required for figuring out the sizes of
+the fetched poster images.
+In Debian/Ubuntu it is packaged as 'python-imaging'.
+http://www.pythonware.com/products/pil/"""
+        imaging_library = False
+class PosterImage:
+        """
+        Holds a single poster image.
+        Contains information of the resolution, location of the file in
+        the file system. etc.
+        """
+        width = 0
+        height = 0
+        file_name = None
+        def __init__(self, file_name):
+                self.file_name = file_name
+                try:
+                        (self.width, self.height) = Image.open(file_name).size
+                except:
+                        # The imaging lib is not installed or some other error.
+                        # Do not take the size in account.
+                        pass
+        def is_vertical(self):
+                return self.width < self.height
+        def pixels(self):
+                return self.width*self.height
+class PosterFetcher:
+        """
+        Base class for poster image fetchers.
+        """
+        def fetch(self, title_string, imdb_id = None):
+                """
+                Fetch and download to a local temporary filename movie posters
+                for the given title.
+                Return empty list in case no images was found.
+                """
+                pass
+        def download_image(self, image_url, extension=None):
+                (fid, local_filename) = tempfile.mkstemp(extension)
+                local_file = os.fdopen(fid, "wb")
+                local_file.write(urllib.urlopen(image_url).read())
+                local_file.close()
+                return PosterImage(local_filename)
+class MoviePosterPosterFetcher(PosterFetcher):
+        """
+        Fetches poster images from movieposter.com
+        """
+        def fetch(self, title_string, imdb_id = None):
+                poster_urls = self.title_search(title_string)
+                results = 0
+                max_results = 4
+                images = []
+                for url in poster_urls:
+                        image_url = self.find_poster_image_url(url)
+                        if image_url is not None:
+                                images.append(self.download_image(image_url, ".jpg"))
+                                results += 1
+                        if results >= max_results:
+                                break
+                return images
+        def find_poster_image_url(self, poster_page_url):
+                """
+                Parses the given poster page and returns an URL pointing to the poster
+                image.
+                """
+                #print "Getting",poster_page_url
+                soup = BeautifulSoup.BeautifulSoup(urllib.urlopen(poster_page_url))
+                imgs = soup.findAll('img', attrs={'src':re.compile('/posters/archive/main/.*')})
+                if len(imgs) == 1:
+                        return "http://eu.movieposter.com/" + imgs[0]['src']
+                return None
+        def title_search(self, title_string):
+                """
+                Executes a title search on movieposter.com.
+                Returns a list of URLs leading to the page for the poster
+                for the given title_string.
+                """
+                params = urllib.urlencode(\
+                        {'ti': title_string,
+                         'pl': 'action',
+                         'th': 'y',
+                         'rs': '12',
+                         'size': 'any'})
+                opener = urllib.URLopener()
+                (filename, headers) = \
+                        opener.retrieve("http://eu.movieposter.com/cgi-bin/mpw8/search.pl",
+                        data=params)
+                f = open(filename, 'r')
+                results = f.read()
+                f.close()
+                return self.parse_title_search_results(results, title_string)
+        def parse_title_search_results(self, result_page, title_string):
+                """
+                Parses the result page of a title search on movieposter.com.
+                Returns a list of URLs leading to a page with poster for the given title.
+                """
+                search = title_string.lower()
+                soup = BeautifulSoup.BeautifulSoup(result_page)
+                divs = soup.findAll('div', attrs={'class':'pid'})
+                urls = []
+                for div in divs:
+                        links = div.findAll('a')
+                        if len(links) > 0:
+                                link = links[0]
+                                title = link['title'].lower()
+                                if title.endswith("poster"):
+                                        title = title[0:-len(" poster")]
+                                if title == search:
+                                        urls.append(link['href'])
+                return urls
+class IMDbPosterFetcher(PosterFetcher):
+        """
+        Fetches poster images from imdb.com.
+        """
+        def fetch(self, title_string, imdb_id = None):
+                if imdb_id is None:
+                        return []
+                poster_url = imdbpy.find_poster_url(imdb_id)
+                if poster_url is not None:
+                        filename = poster_url.split("/")[-1]
+                        (name, extension) = os.path.splitext(filename)
+                        return [self.download_image(poster_url, extension)]
+                return []
+def find_best_posters(title, count=1, accept_horizontal=False, imdb_id=None):
+        fetchers = [MoviePosterPosterFetcher(), IMDbPosterFetcher()]
+        #fetchers = [IMDbPosterFetcher()]
+        posters = []
+        # If it's a series title 'Sopranos, S06E14' then use just the series
+        # name for finding the poster. Strip the episode number.
+        (series_title, season, episode) = imdbpy.detect_series_query(title)
+        if series_title is not None and season is not None and episode is not None:
+                title = series_title.strip()
+                if title.endswith(","):
+                        title = title[0:-1]
+        # Drop 'The" etc.
+        preps = ["the", "a" , "an", "die", "der"]
+        for prep in preps:
+                if title.lower().startswith(prep + " "):
+                        title = title[len(prep + " "):]
+                        break
+        for fetcher in fetchers:
+                new_posters = fetcher.fetch(title, imdb_id)
+                for poster in new_posters:
+                        if not accept_horizontal and not poster.is_vertical():
+                                os.remove(poster.file_name)
+                                continue
+                        posters.append(poster)
+        def size_cmp(a, b):
+                return cmp(a.pixels(), b.pixels())
+        posters.sort(size_cmp)
+        posters.reverse()
+        for small_poster in posters[count:]:
+                os.remove(small_poster.file_name)
+        return posters[0:count]
+def main():
+        p = optparse.OptionParser()
+        p.add_option('--number', '-n', action="store", type="int", default=1,
+                help="the count of biggest posters to get")
+        p.add_option('--all', '-a', action="store_true", default=False,
+                help="accept all posters, even horizontal ones")
+        p.add_option('--poster_search', '-P', metavar='IMDB_ID', default=None, dest="imdb_id",
+                help="Displays a list of URL's to movie posters.  The lines are "\
+                "ranked by descending value. For MythVideo.")
+        options, arguments = p.parse_args()
+        title = ""
+        if len(arguments) != 1:
+                if options.imdb_id:
+                        # TODO: Fetch the title from IMDb.
+                        metadata = imdbpy.metadata_search(options.imdb_id)
+                        title = imdbpy.parse_meta(metadata, "Title")
+                else:
+                        print "Please give a video title as argument."
+                        sys.exit(1)
+        else:
+                title = arguments[0]
+        posters = find_best_posters(title, options.number, options.all,
+                                imdb_id=options.imdb_id)
+        if options.imdb_id is not None:
+                for poster in posters:
+                        print "file://%s" % poster.file_name
+        else:
+                for poster in posters:
+                        print "%s [%dx%d] vertical: %s " % \
+                                (poster.file_name, poster.width,
+                                 poster.height, poster.is_vertical())
+if __name__ == '__main__':
+        main()
+ No newline at end of file

mythvideo/mythvideo/scripts/find_meta.py

Property changes on: mythvideo/mythvideo/scripts/fetch_poster.py
___________________________________________________________________
Name: svn:executable
   + *

 import shlex
 import socket
 import urllib
+import fetch_poster
+import distutils.file_util
 try:
         # If found, we can insert data directly to MythDB
 …
 verbose=False
+try:
+    # For better line editing in interactive mode. Optional.
+    import readline
+except:
+    pass
 interactive=False
 recursive=False
 dbimport=False
 …
         global overwrite
         if not overwrite and oldvalue is not None and oldvalue != emptyvalue:
                 return oldvalue
+        for line in meta.split("\n"):
+                beginning = variable + ":"
+                if line.startswith(beginning):
+                        return line[len(beginning):].strip()
+        return None
+        return imdbpy.parse_meta(meta, variable)
 def detect_disc_number(allfiles, file):
         """
 …
         def parse_metadata(variable, oldvalue, emptyvalue="", meta=metadata):
                 return parse_meta(variable, oldvalue, emptyvalue, meta)
         title = parse_metadata('Title', title)
+        inetref = parse_metadata('IMDb', inetref, '00000000')
+        if inetref == None:
+                inetref = '00000000'
+        coverfile = find_poster_image(title, inetref)
         if disc is not None:
                 title += " (disc " + str(disc) + ")"
 …
                 length = int(length)
         except:
                 length = 0
+        inetref = parse_metadata('IMDb', inetref, '00000000')
+        if inetref == None:
+                inetref = '00000000'
         filename = videopath
         genrestring = parse_metadata('Genres', "", "")
 …
                 return
         else:
                 # Only one genre supported?
+                category = get_genre_id(genres[0])
+        coverfile = find_poster_image(inetref)
+                category = get_genre_id(genres[0])
         if coverfile == None:
                 coverfile = "No cover"
-        else:
-                # TODO: should enter only the filename to allow reusing
-                # the same cover file from multiple hosts where the
-                # poster image directory is mounted to different directories.
-                # This needs to be fixed in MythVideo first.
-                coverfile = poster_dir + "/" + coverfile
         c = db.cursor()
         c.execute("""
 …
         c.close()
         return intid
 def find_poster_image(imdb_id):
+def find_poster_image(title, imdb_id):
         """
         Tries to find a poster image for the given IMDb id.
         First looks if the image already exist, if not, tries to fetch it using
         the imdbpy.py. Returns None in case a poster image couldn't be found,
+        the fetch_poster.py. Returns None in case a poster image couldn't be found,
         otherwise returns the base name of the poster image file.
         """
         global poster_dir
+        global poster_dir,overwrite
         image_extensions = ["png", "jpg", "bmp"]
         poster_files = []
         for ext in image_extensions:
                 poster_files += glob.glob("%s/%s.%s" % (poster_dir, imdb_id, ext))
         if len(poster_files) == 0:
+        if len(poster_files) == 0 or overwrite:
                 # Try to fetch the poster image from the web.
+                poster_url = imdbpy.find_poster_url(imdb_id)
+                if poster_url is None:
+                        return None
+                print_verbose("Found poster at '%s', downloading it..." % poster_url)
+                filename = poster_url.split("/")[-1]
+                posters = fetch_poster.find_best_posters(\
+                        title, count=1, accept_horizontal=True, imdb_id=imdb_id)
+                if len(posters) == 0:
+                        return None
+                poster = posters[0]
+                filename = os.path.basename(poster.file_name)
                 (name, extension) = os.path.splitext(filename)
                 local_filename = poster_dir + "/" + imdb_id + extension
+                urllib.urlretrieve(poster_url, local_filename)
+                poster_files.append(local_filename)
+                if os.path.exists(local_filename):
+                        if overwrite:
+                                os.remove(local_filename)
+                        else:
+                                return local_filename
+                distutils.file_util.move_file(poster.file_name, local_filename)
+                # Set enough read bits so Apache can read the cover for the MythWeb interface.
+                os.chmod(local_filename, S_IREAD | S_IRUSR | S_IRGRP | S_IROTH)
+                return local_filename
         else:
                 print_verbose("Found existing cover image.")
+        coverfile = None
+        if len(poster_files) > 0:
+                # TODO: if multiple poster images available, pick the one with largest
+                # dimensions.
+                # Now just pick the first found.
+                coverfile = os.path.basename(poster_files[0])
+                return poster_files[0]
+        return None
-        return coverfile
 def save_metadata_to_file(fileName, metadata):
         global overwrite
 …
                         if len(candidates) > 1:
                                 print "Got multiple candidates for title search '%s'. " % title
+                                print "Use the '-a' switch to choose the correct one."
+                                if not interactive:
+                                        print "Use the '-a' or '-i' switch to choose the correct one."
                                 for candidate in candidates:
                                         print "%s) %s (%d)" % (candidate[0], candidate[1], candidate[2])

Download in other formats:

Original Format