Ticket #2734: metadata_script_improvements_against_trunk_07-03-04.patch
File metadata_script_improvements_against_trunk_07-03-04.patch, 15.8 KB (added by , 17 years ago) |
---|
-
mythvideo/mythvideo/scripts/imdbpy.py
21 21 print "You need to install the IMDbPy library "\ 22 22 "from (http://imdbpy.sourceforge.net/?page=download)" 23 23 sys.exit(1) 24 24 25 episode_title_format = '%(series_title)s S%(season)d E%(episode)d "%(episode_title)s"' 26 25 27 def detect_series_query(search_string): 26 28 """ 27 29 Detects a series episode query. … … 31 33 "Sopranos S1E2" 32 34 "Sopranos S1 E2" 33 35 "Sopranos 1x2" 36 "Sopranos - 1x2" 34 37 """ 35 m = re.match(r"(.+)\s*(s|(season)|\s)\s*(\d+)\s*(e|(episode)|x)\s*(\d+)", 36 search_string.lower()) 37 if m is None or m.group(1) is None or m.group(4) is None or \ 38 m.group(7) is None: 38 m = re.match(r"((?P<title>.+?)(-)?)?(\s*)"\ 39 "(s|(season)|\s)\s*(?P<season>\d+)"\ 40 "\s*(e|(episode)|x)\s*(?P<episode>\d+)", 41 search_string.lower()) 42 if m is None or m.group('title') is None or m.group('season') is None \ 43 or m.group('episode') is None: 39 44 return (None, None, None) 40 45 41 return (m.group( 1), m.group(4), m.group(7))46 return (m.group('title'), m.group('season'), m.group('episode')) 42 47 43 48 def episode_search(title, season, episode): 44 49 matches = [] 45 50 imdb_access = imdb.IMDb() 46 51 series = imdb_access.search_movie(title.encode("ascii", 'replace')) 52 season = int(season) 53 episode = int(episode) 47 54 48 55 for serie in series: 49 56 if serie['kind'] == 'tv series': … … 51 58 imdb_access.update(serie, 'episodes') 52 59 if serie.has_key('episodes'): 53 60 try: 54 ep = serie['episodes'][ int(season)][int(episode)]61 ep = serie['episodes'][season][episode] 55 62 except: 56 63 # Probably indexing exception in case the episode/season 57 64 # is not found. 58 65 continue 59 66 # Found an exact episode match, return that match only. 60 67 matches = [] 61 matches.append([imdb_access.get_imdbID(ep), 62 title.title().strip() + ", S" + season + " E" + 63 episode, int(serie['year'])]) 68 episode_title = episode_title_format % \ 69 { 70 'series_title': ep['series title'], 71 'season': season, 72 'episode': episode, 73 'episode_title': ep['title'] 74 } 75 matches.append([imdb_access.get_imdbID(ep), episode_title ,int(ep['year'])]) 64 76 return matches 65 77 else: 66 78 matches.append([imdb_access.get_imdbID(serie), … … 182 194 183 195 if movie['kind'] == 'episode': 184 196 # print "TV Series episode detected" 185 metadata += 'Title:%s, S%d E%d: \"%s\"' % \ 186 (movie['series title'], int(movie['season']), 187 int(movie['episode']), movie['title']) + "\n" 197 metadata += 'Title:' + episode_title_format % \ 198 { 199 'series_title': movie['series title'], 200 'season': int(movie['season']), 201 'episode': int(movie['episode']), 202 'episode_title': movie['title'], 203 } + '\n' 188 204 series = movie['episode of'] 189 205 imdb_access.update(series) 190 206 metadata += createMetadataFromFirst('Runtime', 'runtimes', m=series) … … 218 234 metadata += createMetadataFromFirst('Countries', 'countries') 219 235 return metadata 220 236 237 def parse_meta(meta, key): 238 for line in meta.split("\n"): 239 beginning = key + ":" 240 if line.startswith(beginning): 241 return line[len(beginning):].strip() 242 return None 243 221 244 def main(): 222 245 p = optparse.OptionParser() 223 246 p.add_option('--version', '-v', action="store_true", default=False, -
mythvideo/mythvideo/scripts/fetch_poster.py
1 #!/usr/bin/python 2 # -*- coding: utf8 -*- 3 """ 4 This Python script is intended to find the best possible poster/cover image 5 for a video. 6 7 Uses the following www-sites for scraping for the poster image (in this 8 order): 9 10 movieposter.com 11 imdb.com 12 13 Picks the largest (in pixels) vertical poster. 14 15 Written by Pekka JÀÀskelÀinen (gmail: pekka.jaaskelainen) 2007 16 """ 17 18 import urllib 19 import re 20 import tempfile 21 import os 22 import optparse 23 import sys 24 import imdbpy 25 26 movie_poster_site = True 27 try: 28 import BeautifulSoup 29 except: 30 print """BeautifulSoup class is required for parsing the MoviePoster site. 31 32 In Debian/Ubuntu it is packaged as 'python-beautifulsoup'. 33 34 http://www.crummy.com/software/BeautifulSoup/#Download/""" 35 movie_poster_site = False 36 37 imaging_library = True 38 try: 39 import Image 40 except: 41 print """Python Imaging Library is required for figuring out the sizes of 42 the fetched poster images. 43 44 In Debian/Ubuntu it is packaged as 'python-imaging'. 45 46 http://www.pythonware.com/products/pil/""" 47 imaging_library = False 48 49 class PosterImage: 50 """ 51 Holds a single poster image. 52 53 Contains information of the resolution, location of the file in 54 the file system. etc. 55 """ 56 width = 0 57 height = 0 58 file_name = None 59 def __init__(self, file_name): 60 self.file_name = file_name 61 try: 62 (self.width, self.height) = Image.open(file_name).size 63 except: 64 # The imaging lib is not installed or some other error. 65 # Do not take the size in account. 66 pass 67 68 69 def is_vertical(self): 70 return self.width < self.height 71 72 def pixels(self): 73 return self.width*self.height 74 75 class PosterFetcher: 76 """ 77 Base class for poster image fetchers. 78 """ 79 def fetch(self, title_string, imdb_id = None): 80 """ 81 Fetch and download to a local temporary filename movie posters 82 for the given title. 83 84 Return empty list in case no images was found. 85 """ 86 pass 87 88 def download_image(self, image_url, extension=None): 89 90 (fid, local_filename) = tempfile.mkstemp(extension) 91 local_file = os.fdopen(fid, "wb") 92 local_file.write(urllib.urlopen(image_url).read()) 93 local_file.close() 94 return PosterImage(local_filename) 95 96 class MoviePosterPosterFetcher(PosterFetcher): 97 """ 98 Fetches poster images from movieposter.com 99 """ 100 def fetch(self, title_string, imdb_id = None): 101 102 poster_urls = self.title_search(title_string) 103 results = 0 104 max_results = 4 105 images = [] 106 107 for url in poster_urls: 108 image_url = self.find_poster_image_url(url) 109 if image_url is not None: 110 images.append(self.download_image(image_url, ".jpg")) 111 results += 1 112 if results >= max_results: 113 break 114 return images 115 116 def find_poster_image_url(self, poster_page_url): 117 """ 118 Parses the given poster page and returns an URL pointing to the poster 119 image. 120 """ 121 #print "Getting",poster_page_url 122 123 soup = BeautifulSoup.BeautifulSoup(urllib.urlopen(poster_page_url)) 124 125 imgs = soup.findAll('img', attrs={'src':re.compile('/posters/archive/main/.*')}) 126 127 if len(imgs) == 1: 128 return "http://eu.movieposter.com/" + imgs[0]['src'] 129 return None 130 131 132 def title_search(self, title_string): 133 """ 134 Executes a title search on movieposter.com. 135 136 Returns a list of URLs leading to the page for the poster 137 for the given title_string. 138 """ 139 params = urllib.urlencode(\ 140 {'ti': title_string, 141 'pl': 'action', 142 'th': 'y', 143 'rs': '12', 144 'size': 'any'}) 145 opener = urllib.URLopener() 146 (filename, headers) = \ 147 opener.retrieve("http://eu.movieposter.com/cgi-bin/mpw8/search.pl", 148 data=params) 149 150 f = open(filename, 'r') 151 results = f.read() 152 f.close() 153 154 return self.parse_title_search_results(results, title_string) 155 156 def parse_title_search_results(self, result_page, title_string): 157 """ 158 Parses the result page of a title search on movieposter.com. 159 160 Returns a list of URLs leading to a page with poster for the given title. 161 """ 162 search = title_string.lower() 163 soup = BeautifulSoup.BeautifulSoup(result_page) 164 divs = soup.findAll('div', attrs={'class':'pid'}) 165 urls = [] 166 for div in divs: 167 links = div.findAll('a') 168 if len(links) > 0: 169 link = links[0] 170 title = link['title'].lower() 171 if title.endswith("poster"): 172 title = title[0:-len(" poster")] 173 if title == search: 174 urls.append(link['href']) 175 return urls 176 177 178 class IMDbPosterFetcher(PosterFetcher): 179 """ 180 Fetches poster images from imdb.com. 181 """ 182 def fetch(self, title_string, imdb_id = None): 183 184 if imdb_id is None: 185 return [] 186 poster_url = imdbpy.find_poster_url(imdb_id) 187 if poster_url is not None: 188 filename = poster_url.split("/")[-1] 189 (name, extension) = os.path.splitext(filename) 190 return [self.download_image(poster_url, extension)] 191 return [] 192 193 def find_best_posters(title, count=1, accept_horizontal=False, imdb_id=None): 194 fetchers = [MoviePosterPosterFetcher(), IMDbPosterFetcher()] 195 #fetchers = [IMDbPosterFetcher()] 196 posters = [] 197 198 # If it's a series title 'Sopranos, S06E14' then use just the series 199 # name for finding the poster. Strip the episode number. 200 (series_title, season, episode) = imdbpy.detect_series_query(title) 201 if series_title is not None and season is not None and episode is not None: 202 title = series_title.strip() 203 if title.endswith(","): 204 title = title[0:-1] 205 206 # Drop 'The" etc. 207 preps = ["the", "a" , "an", "die", "der"] 208 for prep in preps: 209 if title.lower().startswith(prep + " "): 210 title = title[len(prep + " "):] 211 break 212 213 for fetcher in fetchers: 214 new_posters = fetcher.fetch(title, imdb_id) 215 for poster in new_posters: 216 if not accept_horizontal and not poster.is_vertical(): 217 os.remove(poster.file_name) 218 continue 219 posters.append(poster) 220 221 def size_cmp(a, b): 222 return cmp(a.pixels(), b.pixels()) 223 224 posters.sort(size_cmp) 225 posters.reverse() 226 227 for small_poster in posters[count:]: 228 os.remove(small_poster.file_name) 229 230 return posters[0:count] 231 232 def main(): 233 234 235 p = optparse.OptionParser() 236 p.add_option('--number', '-n', action="store", type="int", default=1, 237 help="the count of biggest posters to get") 238 p.add_option('--all', '-a', action="store_true", default=False, 239 help="accept all posters, even horizontal ones") 240 p.add_option('--poster_search', '-P', metavar='IMDB_ID', default=None, dest="imdb_id", 241 help="Displays a list of URL's to movie posters. The lines are "\ 242 "ranked by descending value. For MythVideo.") 243 244 options, arguments = p.parse_args() 245 246 title = "" 247 if len(arguments) != 1: 248 if options.imdb_id: 249 # TODO: Fetch the title from IMDb. 250 metadata = imdbpy.metadata_search(options.imdb_id) 251 title = imdbpy.parse_meta(metadata, "Title") 252 else: 253 print "Please give a video title as argument." 254 sys.exit(1) 255 else: 256 title = arguments[0] 257 258 posters = find_best_posters(title, options.number, options.all, 259 imdb_id=options.imdb_id) 260 261 if options.imdb_id is not None: 262 for poster in posters: 263 print "file://%s" % poster.file_name 264 else: 265 for poster in posters: 266 print "%s [%dx%d] vertical: %s " % \ 267 (poster.file_name, poster.width, 268 poster.height, poster.is_vertical()) 269 270 if __name__ == '__main__': 271 main() 272 273 No newline at end of file -
mythvideo/mythvideo/scripts/find_meta.py
Property changes on: mythvideo/mythvideo/scripts/fetch_poster.py ___________________________________________________________________ Name: svn:executable + *
47 47 import shlex 48 48 import socket 49 49 import urllib 50 import fetch_poster 51 import distutils.file_util 50 52 51 53 try: 52 54 # If found, we can insert data directly to MythDB … … 60 62 61 63 verbose=False 62 64 65 try: 66 # For better line editing in interactive mode. Optional. 67 import readline 68 except: 69 pass 70 63 71 interactive=False 64 72 recursive=False 65 73 dbimport=False … … 201 209 global overwrite 202 210 if not overwrite and oldvalue is not None and oldvalue != emptyvalue: 203 211 return oldvalue 204 for line in meta.split("\n"): 205 beginning = variable + ":" 206 if line.startswith(beginning): 207 return line[len(beginning):].strip() 208 return None 212 213 return imdbpy.parse_meta(meta, variable) 209 214 210 215 def detect_disc_number(allfiles, file): 211 216 """ … … 396 401 397 402 def parse_metadata(variable, oldvalue, emptyvalue="", meta=metadata): 398 403 return parse_meta(variable, oldvalue, emptyvalue, meta) 399 404 405 400 406 title = parse_metadata('Title', title) 407 inetref = parse_metadata('IMDb', inetref, '00000000') 401 408 409 if inetref == None: 410 inetref = '00000000' 411 412 coverfile = find_poster_image(title, inetref) 413 402 414 if disc is not None: 403 415 title += " (disc " + str(disc) + ")" 404 416 … … 432 444 length = int(length) 433 445 except: 434 446 length = 0 435 436 inetref = parse_metadata('IMDb', inetref, '00000000') 437 438 if inetref == None: 439 inetref = '00000000' 440 447 441 448 filename = videopath 442 449 443 450 genrestring = parse_metadata('Genres', "", "") … … 450 457 return 451 458 else: 452 459 # Only one genre supported? 453 category = get_genre_id(genres[0]) 454 455 coverfile = find_poster_image(inetref) 460 category = get_genre_id(genres[0]) 456 461 457 462 if coverfile == None: 458 463 coverfile = "No cover" 459 else:460 # TODO: should enter only the filename to allow reusing461 # the same cover file from multiple hosts where the462 # poster image directory is mounted to different directories.463 # This needs to be fixed in MythVideo first.464 coverfile = poster_dir + "/" + coverfile465 464 466 465 c = db.cursor() 467 466 c.execute(""" … … 475 474 c.close() 476 475 return intid 477 476 478 def find_poster_image( imdb_id):477 def find_poster_image(title, imdb_id): 479 478 """ 480 479 Tries to find a poster image for the given IMDb id. 481 480 482 481 First looks if the image already exist, if not, tries to fetch it using 483 the imdbpy.py. Returns None in case a poster image couldn't be found,482 the fetch_poster.py. Returns None in case a poster image couldn't be found, 484 483 otherwise returns the base name of the poster image file. 485 484 """ 486 global poster_dir 485 global poster_dir,overwrite 487 486 image_extensions = ["png", "jpg", "bmp"] 488 487 489 488 poster_files = [] 490 489 for ext in image_extensions: 491 490 poster_files += glob.glob("%s/%s.%s" % (poster_dir, imdb_id, ext)) 492 491 493 if len(poster_files) == 0 :492 if len(poster_files) == 0 or overwrite: 494 493 # Try to fetch the poster image from the web. 495 poster_url = imdbpy.find_poster_url(imdb_id) 496 if poster_url is None: 497 return None 498 print_verbose("Found poster at '%s', downloading it..." % poster_url) 499 filename = poster_url.split("/")[-1] 494 posters = fetch_poster.find_best_posters(\ 495 title, count=1, accept_horizontal=True, imdb_id=imdb_id) 496 497 if len(posters) == 0: 498 return None 499 500 poster = posters[0] 501 502 filename = os.path.basename(poster.file_name) 500 503 (name, extension) = os.path.splitext(filename) 501 504 local_filename = poster_dir + "/" + imdb_id + extension 502 urllib.urlretrieve(poster_url, local_filename) 503 poster_files.append(local_filename) 505 if os.path.exists(local_filename): 506 if overwrite: 507 os.remove(local_filename) 508 else: 509 return local_filename 510 distutils.file_util.move_file(poster.file_name, local_filename) 511 # Set enough read bits so Apache can read the cover for the MythWeb interface. 512 os.chmod(local_filename, S_IREAD | S_IRUSR | S_IRGRP | S_IROTH) 513 return local_filename 504 514 else: 505 515 print_verbose("Found existing cover image.") 506 507 coverfile = None 508 if len(poster_files) > 0: 509 # TODO: if multiple poster images available, pick the one with largest 510 # dimensions. 511 # Now just pick the first found. 512 coverfile = os.path.basename(poster_files[0]) 516 return poster_files[0] 517 return None 513 518 514 return coverfile515 519 516 517 520 def save_metadata_to_file(fileName, metadata): 518 521 global overwrite 519 522 … … 593 596 if len(candidates) > 1: 594 597 595 598 print "Got multiple candidates for title search '%s'. " % title 596 print "Use the '-a' switch to choose the correct one." 599 if not interactive: 600 print "Use the '-a' or '-i' switch to choose the correct one." 597 601 for candidate in candidates: 598 602 print "%s) %s (%d)" % (candidate[0], candidate[1], candidate[2]) 599 603