Ticket #1405: imdb.pl.patch

File imdb.pl.patch, 5.3 KB (added by arjeousski@…, 18 years ago)

patch for changes

  • imdb.pl

     
    1515#     - when searching amazon, try searching for main movie name and if nothing is found, search for informal name
    1616#     - better handling for amazon posters, see if movie title is a substring in the search results returned by amazon
    1717#     - fixed redirects for some movies on impawards
     18# v1.3
     19#     - added a valid amazon service developer id for amazon searches (registered to arjeousski@gmail.com)
     20#     - fixed search for low res images (imdb changed the page layout)
     21#     - added cinemablend poster search
     22#     - added nexbase poster search
    1823
     24
    1925use LWP::Simple;      # libwww-perl providing simple HTML get actions
    2026use HTML::Entities;
    2127use URI::Escape;
     
    2632use Getopt::Std;
    2733
    2834$title = "IMDB Query";
    29 $version = "v1.2";
     35$version = "v1.3";
    3036$author = "Tim Harvey, Andrei Rjeousski";
    3137
    3238# display usage
     
    240246
    241247   my $uri = "";
    242248
     249
    243250   # look for references to impawards.com posters - they are high quality
    244251   my $site = "http://www.impawards.com";
    245252   my $impsite = parseBetween($response, "<a href=\"".$site, "\">".$site);
     
    275282      if (defined $opt_d) { print "# found ipmawards poster: $uri\n"; }
    276283   }
    277284
     285   # try looking on nexbase
     286   if ($uri eq "" && $response =~ m/<a href="([^"]*)">([^"]*?)nexbase/i) {
     287        if ($1 ne "") {
     288           if (defined $opt_d) { print "# found nexbase poster page: $1 \n"; }
     289           my $cinres = get $1;
     290           if (defined $opt_d) { printf("# got %i bytes\n", length($cinres)); }
     291           if (defined $opt_r) { printf("%s", $cinres); }
     292
     293           if ($cinres =~ m/<a id="photo_url" href="([^"]*?)" ><\/a>/i) {
     294               if (defined $opt_d) { print "# nexbase url retreived\n"; }
     295               $uri = $1;
     296           }
     297
     298        }
     299
     300   }
     301
     302
     303
     304   # try looking on cinemablend
     305   if ($uri eq "" && $response =~ m/<a href="([^"]*)">([^"]*?)cinemablend/i) {
     306        if ($1 ne "") {
     307           if (defined $opt_d) { print "# found cinemablend poster page: $1 \n"; }
     308           my $cinres = get $1;
     309           if (defined $opt_d) { printf("# got %i bytes\n", length($cinres)); }
     310           if (defined $opt_r) { printf("%s", $cinres); }
     311
     312           if ($cinres =~ m/<td align=center><img src="([^"]*?)" border=1><\/td>/i) {
     313               if (defined $opt_d) { print "# cinemablend url retreived\n"; }
     314               $uri = "http://www.cinemablend.com/".$1;   
     315           }
     316
     317        }
     318
     319   }
     320
    278321   # if the impawards site attempt didn't give a filename grab it from imdb
    279322   if ($uri eq "") {
    280323       if (defined $opt_d) { print "# looking for imdb posters\n"; }
     
    287330          if (defined $opt_d) { print "# no poster found\n"; }
    288331       }
    289332   }
    290    
    291    # now we couldnt even find lowres poster from IMDB, lets try looking for dvd
    292    # cover on amazon.com   
    293333
     334
     335
    294336   my @movie_titles;
    295337   my $found_low_res = 0;
    296338   my $k = 0;
    297339   
    298340   # no poster found, take lowres image from imdb
    299341   if ($uri eq "") {
    300        if (defined $opt_d) { print "# looking for lowres imdb posters\n"; }
    301        my $host = "http://www.imdb.com/title/tt" . $movieid . "/";
    302        $response = get $host;
     342      if (defined $opt_d) { print "# looking for lowres imdb posters\n"; }
     343      my $host = "http://www.imdb.com/title/tt" . $movieid . "/";
     344      $response = get $host;
    303345
    304        $uri = parseBetween($response, "alt=\"cover\" src=\"http://ia.imdb.com/media/imdb/", "\"");
    305        
     346      # Better handling for low resolution posters
     347      #
     348      if ($response =~ m/<a name="poster".*<img.*src="([^"]*).*<\/a>/ig) {
     349           if (defined $opt_d) { print "# found low res poster at: $1\n"; }
     350           $uri = $1;
     351           $found_low_res = 1;
     352
     353      } else {
     354          if (defined $opt_d) { print "# no low res poster found\n"; }
     355          $uri = "";
     356      }
     357 
     358
     359
    306360      if (defined $opt_d) { print "# starting to look for movie title\n"; }
    307361     
    308362      # get main title
     
    319373         if (defined $opt_d) { print "# Title: ".$movie_titles[$k-1]."\n"; }
    320374      }
    321375       
    322        if ($uri ne "" ) {
    323            $uri = "http://ia.imdb.com/media/imdb/".$uri;
    324            $found_low_res = 1;
    325        } else {
    326           if (defined $opt_d) { print "# no poster found\n"; }
    327        }
    328376   }
    329377   
    330378   # now we couldnt even find lowres poster from IMDB, lets try looking for dvd
     
    338386      my $xml_parser = XML::Simple->new();
    339387     
    340388         
     389      # loop through all titles
    341390      do {
    342391         # get rid of the year
    343392         $movie_titles[$titleid] =~ s/ ?\([^\)]+\) ?//g;
     
    352401         my $safe_movie_title = $movie_titles[$titleid];
    353402         $safe_movie_title =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;     
    354403         # request XML info from amazon
    355          my $xml_uri = "http://xml.amazon.com/onca/xml3?t=000&dev-t=000&KeywordSearch=".$safe_movie_title."&mode=dvd&type=lite&page=1&f=xml";
     404         # Amazon DEV_t is from Andrei Rjeousski (arjeousski at gmail.com)
     405         my $xml_uri = "http://xml.amazon.com/onca/xml3?t=000&dev-t=0TJR5CSZSS2KE77KWKG2&KeywordSearch=".$safe_movie_title."&mode=dvd&type=lite&page=1&f=xml";
    356406         if (defined $opt_d) { print "# Amazon request string is: $xml_uri\n";}
    357407
    358408         # get the response