Ticket #1405: imdb.pl.patch
File imdb.pl.patch, 5.3 KB (added by , 18 years ago) |
---|
-
imdb.pl
15 15 # - when searching amazon, try searching for main movie name and if nothing is found, search for informal name 16 16 # - better handling for amazon posters, see if movie title is a substring in the search results returned by amazon 17 17 # - fixed redirects for some movies on impawards 18 # v1.3 19 # - added a valid amazon service developer id for amazon searches (registered to arjeousski@gmail.com) 20 # - fixed search for low res images (imdb changed the page layout) 21 # - added cinemablend poster search 22 # - added nexbase poster search 18 23 24 19 25 use LWP::Simple; # libwww-perl providing simple HTML get actions 20 26 use HTML::Entities; 21 27 use URI::Escape; … … 26 32 use Getopt::Std; 27 33 28 34 $title = "IMDB Query"; 29 $version = "v1. 2";35 $version = "v1.3"; 30 36 $author = "Tim Harvey, Andrei Rjeousski"; 31 37 32 38 # display usage … … 240 246 241 247 my $uri = ""; 242 248 249 243 250 # look for references to impawards.com posters - they are high quality 244 251 my $site = "http://www.impawards.com"; 245 252 my $impsite = parseBetween($response, "<a href=\"".$site, "\">".$site); … … 275 282 if (defined $opt_d) { print "# found ipmawards poster: $uri\n"; } 276 283 } 277 284 285 # try looking on nexbase 286 if ($uri eq "" && $response =~ m/<a href="([^"]*)">([^"]*?)nexbase/i) { 287 if ($1 ne "") { 288 if (defined $opt_d) { print "# found nexbase poster page: $1 \n"; } 289 my $cinres = get $1; 290 if (defined $opt_d) { printf("# got %i bytes\n", length($cinres)); } 291 if (defined $opt_r) { printf("%s", $cinres); } 292 293 if ($cinres =~ m/<a id="photo_url" href="([^"]*?)" ><\/a>/i) { 294 if (defined $opt_d) { print "# nexbase url retreived\n"; } 295 $uri = $1; 296 } 297 298 } 299 300 } 301 302 303 304 # try looking on cinemablend 305 if ($uri eq "" && $response =~ m/<a href="([^"]*)">([^"]*?)cinemablend/i) { 306 if ($1 ne "") { 307 if (defined $opt_d) { print "# found cinemablend poster page: $1 \n"; } 308 my $cinres = get $1; 309 if (defined $opt_d) { printf("# got %i bytes\n", length($cinres)); } 310 if (defined $opt_r) { printf("%s", $cinres); } 311 312 if ($cinres =~ m/<td align=center><img src="([^"]*?)" border=1><\/td>/i) { 313 if (defined $opt_d) { print "# cinemablend url retreived\n"; } 314 $uri = "http://www.cinemablend.com/".$1; 315 } 316 317 } 318 319 } 320 278 321 # if the impawards site attempt didn't give a filename grab it from imdb 279 322 if ($uri eq "") { 280 323 if (defined $opt_d) { print "# looking for imdb posters\n"; } … … 287 330 if (defined $opt_d) { print "# no poster found\n"; } 288 331 } 289 332 } 290 291 # now we couldnt even find lowres poster from IMDB, lets try looking for dvd292 # cover on amazon.com293 333 334 335 294 336 my @movie_titles; 295 337 my $found_low_res = 0; 296 338 my $k = 0; 297 339 298 340 # no poster found, take lowres image from imdb 299 341 if ($uri eq "") { 300 301 302 342 if (defined $opt_d) { print "# looking for lowres imdb posters\n"; } 343 my $host = "http://www.imdb.com/title/tt" . $movieid . "/"; 344 $response = get $host; 303 345 304 $uri = parseBetween($response, "alt=\"cover\" src=\"http://ia.imdb.com/media/imdb/", "\""); 305 346 # Better handling for low resolution posters 347 # 348 if ($response =~ m/<a name="poster".*<img.*src="([^"]*).*<\/a>/ig) { 349 if (defined $opt_d) { print "# found low res poster at: $1\n"; } 350 $uri = $1; 351 $found_low_res = 1; 352 353 } else { 354 if (defined $opt_d) { print "# no low res poster found\n"; } 355 $uri = ""; 356 } 357 358 359 306 360 if (defined $opt_d) { print "# starting to look for movie title\n"; } 307 361 308 362 # get main title … … 319 373 if (defined $opt_d) { print "# Title: ".$movie_titles[$k-1]."\n"; } 320 374 } 321 375 322 if ($uri ne "" ) {323 $uri = "http://ia.imdb.com/media/imdb/".$uri;324 $found_low_res = 1;325 } else {326 if (defined $opt_d) { print "# no poster found\n"; }327 }328 376 } 329 377 330 378 # now we couldnt even find lowres poster from IMDB, lets try looking for dvd … … 338 386 my $xml_parser = XML::Simple->new(); 339 387 340 388 389 # loop through all titles 341 390 do { 342 391 # get rid of the year 343 392 $movie_titles[$titleid] =~ s/ ?\([^\)]+\) ?//g; … … 352 401 my $safe_movie_title = $movie_titles[$titleid]; 353 402 $safe_movie_title =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg; 354 403 # request XML info from amazon 355 my $xml_uri = "http://xml.amazon.com/onca/xml3?t=000&dev-t=000&KeywordSearch=".$safe_movie_title."&mode=dvd&type=lite&page=1&f=xml"; 404 # Amazon DEV_t is from Andrei Rjeousski (arjeousski at gmail.com) 405 my $xml_uri = "http://xml.amazon.com/onca/xml3?t=000&dev-t=0TJR5CSZSS2KE77KWKG2&KeywordSearch=".$safe_movie_title."&mode=dvd&type=lite&page=1&f=xml"; 356 406 if (defined $opt_d) { print "# Amazon request string is: $xml_uri\n";} 357 407 358 408 # get the response