Ticket #5401: allocine.pl.diff

File allocine.pl.diff, 29.2 KB (added by Xavier Hervy, 16 years ago)
  • .pl

    old new  
    3131binmode(STDOUT, ":utf8");
    3232
    3333# display usage
    34 sub usage {
    35    print "usage: $0 -hviocMPD [parameters]\n";
    36    print "       -h, --help                       help\n";
    37    print "       -v, --version                    display version\n";
    38    print "       -i, --info                       display info\n";
    39    print "       -o, --originaltitle              concatenate title and original title\n";
    40    print "       -c, --casting                    with -D option, grap the complete actor list (much slower)\n";
    41    print "\n";
    42    print "       -M <query>,   --movie query>     get movie list\n";
    43    print "       -D <movieid>, --data <movieid>   get movie data\n";
    44    print "       -P <movieid>, --poster <movieid> get movie poster\n";
    45    exit(-1);
     34sub usage
     35{
     36    print "usage: $0 -hviocMPD [parameters]\n";
     37    print "       -h, --help                       help\n";
     38    print "       -v, --version                    display version\n";
     39    print "       -i, --info                       display info\n";
     40    print "       -o, --originaltitle              concatenate title and original title\n";
     41    print "       --uk                             Get from screenruch.co.uk\n";
     42    print "       -c, --casting                    with -D option, grap the complete actor list (much slower)\n";
     43    print "\n";
     44    print "       -M <query>,   --movie query>     get movie list\n";
     45    print "       -D <movieid>, --data <movieid>   get movie data\n";
     46    print "       -P <movieid>, --poster <movieid> get movie poster\n";
     47    exit(-1);
    4648}
    4749
    4850# display 1-line of info that describes the version of the program
    49 sub version {
    50    print "$title ($version) by $author\n"
     51sub version
     52{
     53    print "$title ($version) by $author\n"
    5154}
    5255
    5356# display 1-line of info that can describe the type of query used
    54 sub info {
    55    print "Performs queries using the www.allocine.fr website.\n";
     57sub info
     58{
     59    print "Performs queries using the www.allocine.fr website or www.screenrush.co.uk.\n";
    5660}
    5761
    5862# display detailed help
    59 sub help {
    60    version();
    61    info();
    62    usage();
     63sub help
     64{
     65    version();
     66    info();
     67    usage();
    6368}
    6469
    6570# returns text within 'data' between 'beg' and 'end' matching strings
    66 sub parseBetween {
    67    my ($data, $beg, $end)=@_; # grab parameters
    68 
    69    my $ldata = lc($data);
    70    my $start = index($ldata, lc($beg)) + length($beg);
    71    my $finish = index($ldata, lc($end), $start);
     71sub parseBetween
     72{
     73    my ($data, $beg, $end)=@_; # grab parameters
     74
     75    my $ldata = lc($data);
     76    my $start = index($ldata, lc($beg)) + length($beg);
     77    my $finish = index($ldata, lc($end), $start);
    7278   
    7379
    74    if ($start != (length($beg) -1) && $finish != -1) {
    75         my $result = substr($data, $start, $finish - $start);
    76         # dont use decode entities &npsp; => spécial characters bug in html::entities ?
    77         #decode_entities($result);
    78         return  removenbsp($result);
    79    }
    80    return "";
     80    if ($start != (length($beg) -1) && $finish != -1)
     81    {
     82        my $result = substr($data, $start, $finish - $start);
     83        # dont use decode entities &npsp; => spécial characters bug in html::entities ?
     84        #decode_entities($result);
     85        return  removenbsp($result);
     86    }
     87    return "";
    8188}
    8289
    8390# use to replace &nbsp; by " " (instead of decode_entities)
    8491sub removenbsp {
    85    my ($data)=@_; # grab parameters
     92    my ($data)=@_; # grab parameters
    8693
    87    my $ldata = lc($data);
    88    my $start = index($ldata, "&nbsp;");
    89    while ($start != -1){
    90       $data = substr($data, 0, $start). " " .substr($data, $start+6, length($data));
    91       $ldata = lc($data);
    92       $start = index($ldata, "&nbsp;");
    93    }
    94    return $data;
     94    my $ldata = lc($data);
     95    my $start = index($ldata, "&nbsp;");
     96    while ($start != -1)
     97    {
     98        $data = substr($data, 0, $start). " " .substr($data, $start+6, length($data));
     99        $ldata = lc($data);
     100        $start = index($ldata, "&nbsp;");
     101    }
     102    return $data;
    95103}
    96104
    97105
    98106# returns text within 'data' without tag
    99 sub removeTag {
    100    my ($data)=@_; # grab parameters
    101 
    102    my $ldata = lc($data);
    103    my $start = index($ldata, "<");
    104    my $finish = index($ldata, ">", $start)+1;
    105    while ($start != -1 && $finish != -1){
    106       $data = substr($data, 0, $start).substr($data, $finish, length($data));
    107       $ldata = lc($data);
    108       $start = index($ldata, "<");
    109       $finish = index($ldata, ">", $start)+1;
    110    }
    111    return $data;
     107sub removeTag
     108{
     109    my ($data)=@_; # grab parameters
     110
     111    my $ldata = lc($data);
     112    my $start = index($ldata, "<");
     113    my $finish = index($ldata, ">", $start)+1;
     114    while ($start != -1 && $finish != -1)
     115    {
     116        $data = substr($data, 0, $start).substr($data, $finish, length($data));
     117        $ldata = lc($data);
     118        $start = index($ldata, "<");
     119        $finish = index($ldata, ">", $start)+1;
     120    }
     121    return $data;
    112122}
    113123
    114124# get Movie Data
    115 sub getMovieData {
    116    my ($movieid)=@_; # grab movieid parameter
    117    if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}
    118 
    119    # get the search results  page
    120    my $request = "http://www.allocine.fr/film/fichefilm_gen_cfilm=" . $movieid . ".html";
    121    if (defined $opt_d) { printf("# request: '%s'\n", $request); }
    122    my $response = get $request;
    123 
    124    # parse title and year
    125    my $title = parseBetween($response, "<title>", "</title>");
    126    my $original_title = parseBetween($response, "<h4>Titre original : <i>","</i></h4></div>");
    127    $original_title = removeTag($original_title);
    128    if (defined $opt_originaltitle){
    129       if ($original_title  ne  ""){
    130         $title = $title . " (" . $original_title . ")";
    131       }
    132    }
     125sub getMovieData
     126{
     127    my ($movieid)=@_; # grab movieid parameter
     128
     129    my $website = "www.allocine.fr";
     130    if (defined $opt_uk) { $website = "www.screenrush.co.uk";}
     131    if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}
     132
     133    # get the search results  page
     134    my $request = "http://" . $website . "/film/fichefilm_gen_cfilm=" . $movieid . ".html";
     135
     136    if (defined $opt_d) { printf("# request: '%s'\n", $request); }
     137    my $response = get $request;
     138
     139    # parse title and year
     140    my $title = parseBetween($response, "<title>", "</title>");
     141    my $original_title;
     142    if (defined $opt_uk)
     143    {
     144        $original_title = parseBetween($response, "<h4>Original title : <i>","</i></h4></div>");
     145    }
     146    else
     147    {
     148        $original_title = parseBetween($response, "<h4>Titre original : <i>","</i></h4></div>");
     149    }
     150    $original_title = removeTag($original_title);
     151    if (defined $opt_originaltitle)
     152    {
     153        if ($original_title  ne  "")
     154        {
     155            $title = $title . " (" . $original_title . ")";
     156        }
     157    }
    133158   
    134    #print "titre = $title\n";
    135    $title = removeTag($title);
    136    my $year = parseBetween($response,"<h4>Année de production : ","</h4>");
    137 
    138    # parse director
    139    my $director = parseBetween($response,"<h4>Réalisé par ","</h4>");
    140    $director = removeTag($director);
    141 
    142    # parse writer
    143    # (Note: this takes the 'first' writer, may want to include others)
    144    my $writer = parseBetween($response, ">Writing credits</b>", "</table>");
    145    $writer = parseBetween($writer, "/\">", "</");
    146 
    147    # parse plot
    148    my $plot = parseBetween($response,"<td valign=\"top\" style=\"padding:10 0 0 0\"><div align=\"justify\"><h4>","</h4></div></td>");
    149    $plot =~ s/\n//g;
    150    $plot = removeTag($plot);
     159    $title = removeTag($title);
     160
     161    my $year;
     162    if (defined $opt_uk)
     163    {
     164        $year = parseBetween($response, "<h4>Production year : ","</h4>");
     165    }
     166    else
     167    {
     168        $year = parseBetween($response,"<h4>Année de production : ","</h4>");
     169    }
     170
     171    # parse director
     172    my $director;
     173    if (defined $opt_uk)
     174    {
     175        $director = parseBetween($response,"<h4>Directed by ","</h4>");
     176    }
     177    else
     178    {
     179        $director = parseBetween($response,"<h4>Réalisé par ","</h4>");
     180    }
     181    $director = removeTag($director);
     182
     183    # parse writer
     184    # (Note: this takes the 'first' writer, may want to include others)
     185    my $writer = parseBetween($response, ">Writing credits</b>", "</table>");
     186    $writer = parseBetween($writer, "/\">", "</");
     187
     188    # parse plot
     189    my $plot = parseBetween($response,"<td valign=\"top\" style=\"padding:10 0 "
     190            ."0 0\"><div align=\"justify\"><h4>","</h4></div></td>");
     191    $plot =~ s/\n//g;
     192    $plot = removeTag($plot);
    151193 
    152    # parse user rating
    153    my $userrating;
    154    my $nbvote = 0;
    155    my $sommevote = 0;
    156    my $rating = parseBetween($response,"Presse</a></h5>", " border");
    157    $rating = parseBetween($rating,"etoile_","\"");
    158 
    159    if (!($rating eq "")){
    160             $sommevote += $rating;
    161             $nbvote ++;
    162    }
    163    $rating = parseBetween($response,"Spectateurs</a></h5>", " border");
    164    $rating = parseBetween($rating,"etoile_","\"");
    165    if (!($rating eq "")){
    166         $sommevote += $rating;
    167         $nbvote ++;
    168    }
    169    if ($nbvote==0){$userrating=0};
    170    if ($nbvote==1){$userrating=$sommevote*2;};
    171    if ($nbvote==2){$userrating=$sommevote;};
    172        
     194    # parse user rating
     195    my $userrating;
     196    my $nbvote = 0;
     197    my $sommevote = 0;
     198    my $rating;
     199   
     200    if (defined $opt_uk)
     201    {
     202        $rating = parseBetween($response,"Press&nbsp;reviews</a></h5>",
     203            " border");
     204    }
     205    else
     206    {
     207        $rating = parseBetween($response,"Presse</a></h5>", " border");
     208    }
     209    $rating = parseBetween($rating,"etoile_","\"");
    173210
    174    # parse rating
    175     my $movierating = parseBetween($response,"Interdit aux moins de ","ans");
    176     if (!($movierating eq ""))
    177     {
    178         $movierating = "Interdit aux moins de " . $movierating . "ans";
     211    if (!($rating eq ""))
     212    {
     213        $sommevote += $rating;
     214        $nbvote ++;
     215    }
     216
     217    if (defined $opt_uk)
     218    {
     219        $rating = parseBetween($response,"user reviews</a></h5>", " border");
    179220    }
    180221    else
    181222    {
    182         $movierating = parseBetween($response,"Film pour enfants à partir de ","ans");
     223        $rating = parseBetween($response,"Spectateurs</a></h5>", " border");
     224    }
     225    $rating = parseBetween($rating,"etoile_","\"");
     226    if (!($rating eq ""))
     227    {
     228        $sommevote += $rating;
     229        $nbvote ++;
     230    }
     231    if ($nbvote==0){$userrating=0};
     232    if ($nbvote==1){$userrating=$sommevote*2;};
     233    if ($nbvote==2){$userrating=$sommevote;};
     234       
     235
     236    # parse rating
     237    my $movierating;
     238    if (defined $opt_uk)
     239    {
     240        $movierating = parseBetween($response,">Rated UK : ","</h4>");
     241        if (!($movierating eq ""))
     242        {
     243            $movierating = "Rated UK: " . $movierating;
     244        }
     245    }
     246    else
     247    {   
     248        $movierating = parseBetween($response,"Interdit aux moins de ","ans");
    183249        if (!($movierating eq ""))
    184250        {
    185             $movierating = "Film pour enfants à partir de " . $movierating . "ans";
     251            $movierating = "Interdit aux moins de " . $movierating . "ans";
    186252        }
    187253        else
    188254        {
    189             $movierating = parseBetween($response,"Visible ","enfants");
    190             if (!($movierating eq "")){ $movierating = "Visible par des enfants";};
     255            $movierating = parseBetween($response,"Film pour enfants à partir de ","ans");
     256            if (!($movierating eq ""))
     257            {
     258                $movierating = "Film pour enfants à partir de " . $movierating . "ans";
     259            }
     260            else
     261            {
     262                $movierating = parseBetween($response,"Visible ","enfants");
     263                if (!($movierating eq "")){ $movierating = "Visible par des enfants";};
     264            }
    191265        }
    192266    }
    193    
    194267
    195    # parse movie length
    196    my $runtime = parseBetween($response,"Durée : ",".&nbsp;</h4>");
    197    my $heure;
    198    my $minutes;
    199    ($heure,$minutes)=($runtime=~/[^\d]*(\d+)[^\d]*(\d*)/);
    200    if (!$heure){ $heure = 0; }
    201    if (!$minutes){
    202       $runtime = $heure * 60;
    203    }else{
     268    # parse movie length
     269    my $runtime;
     270    if (defined $opt_uk)
     271    {
     272        $runtime = parseBetween($response,"Running time : ",".&nbsp;</h4>");
     273    }
     274    else
     275    {
     276        $runtime = parseBetween($response,"Durée : ",".&nbsp;</h4>");
     277    }
     278    my $heure;
     279    my $minutes;
     280    ($heure,$minutes)=($runtime=~/[^\d]*(\d+)[^\d]*(\d*)/);
     281    if (!$heure){ $heure = 0; }
     282    if (!$minutes)
     283    {
     284        $runtime = $heure * 60;
     285    }
     286    else
     287    {
    204288       $runtime = $heure * 60 + $minutes;
    205   }
    206 
    207  
    208 
    209 
    210    # parse cast
     289    }
    211290
    212    my $cast = parseBetween($response, "<h4>Avec "," &nbsp;&nbsp;<img src");
    213    $cast = removeTag($cast);
    214    if (defined $opt_casting){
    215       my $responsecasting = get "http://www.allocine.fr/film/casting_gen_cfilm=" . $movieid . ".html";
    216       my $fullcast = parseBetween($responsecasting, "Acteurs", "<table");
    217       print "debug:fullcast=$fullcast\n";
    218       my $oneactor;
    219       $fullcast = parseBetween($fullcast,"style=\"background-color", "</table>");
    220       my @listactor = split("style=\"background-color", $fullcast);
    221       my @cleanlistactor ;
    222       for $oneactor (@listactor ) {
    223         $oneactor = parseBetween($oneactor,"class=\"link1\">","</a>");
    224         $oneactor =  removeTag($oneactor );       
    225         push(@cleanlistactor,$oneactor);
     291    # parse cast
     292    my $cast;
     293    if (defined $opt_uk)
     294    {
     295        $cast = parseBetween($response, "<h4>cast "," &nbsp;&nbsp;<img src");
     296    }
     297    else
     298    {
     299        $cast = parseBetween($response, "<h4>Avec "," &nbsp;&nbsp;<img src");
     300    }
     301    $cast = removeTag($cast);
     302    if (defined $opt_casting)
     303    {
     304        my $responsecasting = get "http://" . $website
     305            . "/film/casting_gen_cfilm=" . $movieid . ".html";
     306        my $fullcast;
     307        if (defined $opt_uk)
     308        {
     309            $fullcast = parseBetween($responsecasting, "Actor(s)", "<table");
     310        }
     311        else
     312        {
     313            $fullcast = parseBetween($responsecasting, "Acteurs", "<table");
     314        }
     315        my $oneactor;
     316        $fullcast = parseBetween($fullcast,"style=\"background-color", "</table>");
     317        my @listactor = split("style=\"background-color", $fullcast);
     318        my @cleanlistactor ;
     319        for $oneactor (@listactor )
     320        {
     321            $oneactor = parseBetween($oneactor,"class=\"link1\">","</a>");
     322            $oneactor =  removeTag($oneactor );       
     323            push(@cleanlistactor,$oneactor);
    226324            }
    227325            my $finalcast = join (", ",@cleanlistactor);
    228326            if ($finalcast  ne "") {$cast = $finalcast;};
    229    }
    230    
    231    
     327    }
    232328
    233    #genres
    234    my $genres = parseBetween($response,"<h4>Genre : ","</h4>");
    235    $genres = removeTag($genres);
     329    #genres
     330    my $genres = parseBetween($response,"<h4>Genre : ","</h4>");
     331    $genres = removeTag($genres);
    236332   
    237    #countries
    238    my $countries = parseBetween($response,"<h4>Film ",".&nbsp;</h4>");
    239    $countries = removeTag($countries);
    240 
    241    # output fields (these field names must match what MythVideo is looking for)
    242    print "Title:$title\n";
    243    if (!(defined $opt_originaltitle)){
    244     print "OriginalTitle:$original_title\n";
    245    } 
    246    print "Year:$year\n";
    247    print "Director:$director\n";
    248    print "Plot:$plot\n";
    249    print "UserRating:$userrating\n";
    250    print "MovieRating:$movierating\n";
    251    print "Runtime:$runtime\n";
    252    print "Writers: $writer\n";
    253    print "Cast: $cast\n";
    254    print "Genres:$genres\n";
    255    print "Countries:$countries\n";
     333    #countries
     334    my $countries;
     335    if (defined $opt_uk)
     336    {
     337        $countries = parseBetween($response,"<h4>Nationality : ",".&nbsp;</h4>");
     338    }
     339    else
     340    {
     341        $countries = parseBetween($response,"<h4>Film ",".&nbsp;</h4>");
     342    }
     343    $countries = removeTag($countries);
     344
     345    # output fields (these field names must match what MythVideo is looking for)
     346    print "Title:$title\n";
     347    if (!(defined $opt_originaltitle))
     348    {
     349        print "OriginalTitle:$original_title\n";
     350    } 
     351    print "Year:$year\n";
     352    print "Director:$director\n";
     353    print "Plot:$plot\n";
     354    print "UserRating:$userrating\n";
     355    print "MovieRating:$movierating\n";
     356    print "Runtime:$runtime\n";
     357    print "Writers: $writer\n";
     358    print "Cast: $cast\n";
     359    print "Genres:$genres\n";
     360    print "Countries:$countries\n";
    256361}
    257362
    258363# dump Movie Poster
    259 sub getMoviePoster {
    260    my ($movieid)=@_; # grab movieid parameter
    261    if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}
     364sub getMoviePoster
     365{
     366    my ($movieid)=@_; # grab movieid parameter
     367    my $website = "www.allocine.fr";
     368    if (defined $opt_uk) { $website = "www.screenrush.co.uk";}
     369    if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}
    262370
    263    # get the search results  page
     371    # get the search results  page
    264372   
    265    my $request = "http://www.allocine.fr/film/galerie_gen_cfilm=" . $movieid . ".html";
    266    if (defined $opt_d) { printf("# request: '%s'\n", $request); }
    267    my $response = get $request;
    268    my $page=parseBetween($response,"&page=",".html\" class=\"link1\"><span class=\"text2\">>>");
    269    my @pages = split ("page=",$page);
    270    $request = "";
    271 
    272    my $uri = "";
    273    my $furi = "";
    274    my $first= 1;
    275    for $page (@pages ) {
     373    my $request = "http://".$website."/film/galerie_gen_cfilm=" . $movieid . ".html";
     374    if (defined $opt_d) { printf("# request: '%s'\n", $request); }
     375    my $response = get $request;
     376    my $page=parseBetween($response,"&page=",".html\" class=\"link1\"><span class=\"text2\">>>");
     377    my @pages = split ("page=",$page);
     378    $request = "";
     379
     380    my $uri = "";
     381    my $furi = "";
     382    my $first= 1;
     383    for $page (@pages )
     384    {
    276385        $request = $page;
    277386       
    278         #
    279         # get only the page number
    280         #
    281         $request = substr($request, 0, index($request, '.'));
     387        #
     388        # get only the page number
     389        #
     390        $request = substr($request, 0, index($request, '.'));
    282391 
    283         if (!($request eq "")) {
    284              $request = "http://www.allocine.fr/film/galerie_gen_cfilm=" . $movieid . "&page=" . $request . ".html";
    285              $response = get $request;
     392        if (!($request eq ""))
     393        {
     394            $request = "http://".$website."/film/galerie_gen_cfilm=".$movieid
     395                ."&page=" . $request . ".html";
     396            $response = get $request;
    286397       
    287              $uri = parseBetween($response,"<table style=\"padding:0 0 0 0\" border=\"0\" >","Ko\" />");
    288              $uri = parseBetween($uri ,"<img src=\"","\" border=\"0\" class=\"galerie\" ");
    289              if ($first && ! ($uri eq ""))
    290              {
    291                      $furi = $uri;
    292                      $first = 0;
    293              }
     398            $uri = parseBetween($response,
     399                "<table style=\"padding:0 0 0 0\" border=\"0\" >","Ko\" />");
     400            $uri = parseBetween($uri ,"<img src=\"","\" border=\"0\" class=\"galerie\" ");
     401            if ($first && ! ($uri eq ""))
     402            {
     403                $furi = $uri;
     404                $first = 0;
     405            }
     406        }
     407        #
     408            # stop when we have an poster...
     409            #
     410            last if (($uri =~ /affiche/) or ($uri =~ /_af/))
     411    }
    294412
     413    # if $uri =~ affiche or _af then get the first poster if exist
    295414
     415    if (($uri !~ /affiche/) or ($uri !~ /_af/))
     416    {
     417        if ($first == 0)
     418        {
     419            $uri = $furi;
    296420        }
    297         #
    298         # stop when we have an poster...
    299         #
    300         last if (($uri =~ /affiche/) or ($uri =~ /_af/))
    301    }
    302 
    303    # if $uri =~ affiche or _af then get the first poster if exist
    304 
    305    if (($uri !~ /affiche/) or ($uri !~ /_af/))
    306    {
    307            if ($first == 0)
    308            {
    309                    $uri = $furi;
    310            }
    311    }
    312 
    313    #
    314    # in case nothing was found fall back to the little poster...
    315    #
    316    if ($uri eq "")
    317    {
    318         $request = "http://www.allocine.fr/film/fichefilm_gen_cfilm=" . $movieid .".html";
    319         $response = get $request;
    320         $response = parseBetween($response, "sousnav_separe_droite2.gif","sortie");
    321         $uri = parseBetween($response, "<img src=\"","\"");
    322    
     421    }
     422
     423    #
     424    # in case nothing was found fall back to the little poster...
     425    #
     426    if ($uri eq "")
     427    {
     428            $request = "http://".$website."/film/fichefilm_gen_cfilm=".$movieid
     429            .".html";
     430        $response = get $request;
     431        $response = parseBetween($response, "sousnav_separe_droite2.gif","sortie");
     432        $uri = parseBetween($response, "<img src=\"","\"");
     433
    323434        #
    324         # in case no little poster was found get the small DVD poster
    325         # if exists !
     435        # in case no little poster was found get the small DVD poster
     436        # if exists !
    326437        #
    327438        if ($uri =~ /AffichetteAllocine/)
    328         {
    329                 $request = "http://www.allocine.fr/film/fichefilm_gen_cfilm=" . $movieid .".html";
    330                 $response = get $request;
    331                 $response = parseBetween($response, "Disponible en","Zone");
    332                 $uri = parseBetween($response, "<img src=\"","\"");
    333                 return if ($uri eq "");
    334         }
    335    }
     439        {
     440            $request = "http://".$website."/film/fichefilm_gen_cfilm=".$movieid
     441                .".html";
     442            $response = get $request;
     443            $response = parseBetween($response, "Disponible en","Zone");
     444            $uri = parseBetween($response, "<img src=\"","\"");
     445            return if ($uri eq "");
     446        }
     447    }
    336448 
    337    print "$uri\n";
     449    print "$uri\n";
    338450}
    339451
    340452# dump Movie list:  1 entry per line, each line as 'movieid:Movie Title'
    341453sub getMovieList {
    342    my ($filename, $options)=@_; # grab parameters
    343 
    344    # If we wanted to inspect the file for any reason we can do that now
     454    my ($filename, $options)=@_; # grab parameters
     455    my $website = "www.allocine.fr";
     456    if (defined $opt_uk) { $website = "www.screenrush.co.uk";}
     457
     458    # If we wanted to inspect the file for any reason we can do that now
     459
     460    #
     461    # Convert filename into a query string
     462    # (use same rules that Metadata::guesTitle does)
     463    my $query = $filename;
     464    $query = uri_unescape($query);  # in case it was escaped
     465    # Strip off the file extension
     466    if (rindex($query, '.') != -1)
     467    {
     468       $query = substr($query, 0, rindex($query, '.'));
     469    }
     470    # Strip off anything following '(' - people use this for general comments
     471    if (rindex($query, '(') != -1)
     472    {
     473       $query = substr($query, 0, rindex($query, '('));
     474    }
     475    # Strip off anything following '[' - people use this for general comments
     476    if (rindex($query, '[') != -1)
     477    {
     478       $query = substr($query, 0, rindex($query, '['));
     479    }
     480    # Strip off anything following '-' - people use this for general comments
     481    if (index($query, '-') != -1)
     482    {
     483       $query = substr($query, 0, index($query, '-'));
     484    }
    345485
    346    #
    347    # Convert filename into a query string
    348    # (use same rules that Metadata::guesTitle does)
    349    my $query = $filename;
    350    $query = uri_unescape($query);  # in case it was escaped
    351    # Strip off the file extension
    352    if (rindex($query, '.') != -1) {
    353       $query = substr($query, 0, rindex($query, '.'));
    354    }
    355    # Strip off anything following '(' - people use this for general comments
    356    if (rindex($query, '(') != -1) {
    357       $query = substr($query, 0, rindex($query, '('));
    358    }
    359    # Strip off anything following '[' - people use this for general comments
    360    if (rindex($query, '[') != -1) {
    361       $query = substr($query, 0, rindex($query, '['));
    362    }
    363    # Strip off anything following '-' - people use this for general comments
    364    if (index($query, '-') != -1) {
    365       $query = substr($query, 0, index($query, '-'));
    366    }
    367 
    368    # IMDB searches do better if any trailing ,The is left off
    369    $query =~ /(.*), The$/i;
    370    if ($1) { $query = $1; }
     486    # IMDB searches do better if any trailing ,The is left off
     487    $query =~ /(.*), The$/i;
     488    if ($1) { $query = $1; }
    371489   
    372    # prepare the url
    373    $query = uri_escape($query);
    374    if (!$options) { $options = "" ;}
    375    if (defined $opt_d) {
    376       printf("# query: '%s', options: '%s'\n", $query, $options);
    377    }
    378    my $count = 0;
    379    my $typerecherche = 3;
     490    # prepare the url
     491    $query = uri_escape($query);
     492    if (!$options) { $options = "" ;}
     493    if (defined $opt_d)
     494    {
     495        printf("# query: '%s', options: '%s'\n", $query, $options);
     496    }
     497    my $count = 0;
     498    my $typerecherche = 3;
    380499 
    381    while (($typerecherche <=5) && ($count ==0)){
    382            # get the search results  page
    383            my $request = "http://www.allocine.fr/recherche/?rub=1&motcle=$query";
    384            if (defined $opt_d) { printf("# request: '%s'\n", $request); }
    385            my $response = get $request;
    386 
    387            #
    388            # don't try to invent if it doesn't exist
    389            #
    390            return if $response =~ /Pas de résultats/;
    391        
    392            # extract possible matches
    393            #    possible matches are grouped in several catagories: 
    394            #        exact, partial, and approximate
    395            my $exact_matches = $response;
    396            # parse movie list from matches
    397            my $beg = "<h4><a href=\"/film/fichefilm_gen_cfilm=";
    398            my $end = "</a></h4>";
     500    while (($typerecherche <=5) && ($count ==0))
     501    {
     502        # get the search results  page
     503        my $request = "http://".$website."/recherche/?rub=1&motcle=$query";
     504        if (defined $opt_d) { printf("# request: '%s'\n", $request); }
     505        my $response = get $request;
     506
     507        #
     508        # don't try to invent if it doesn't exist
     509        #
     510        return if $response =~ /Pas de résultats/;
     511
     512        # extract possible matches
     513        #    possible matches are grouped in several catagories: 
     514        #        exact, partial, and approximate
     515        my $exact_matches = $response;
     516        # parse movie list from matches
     517        my $beg = "<h4><a href=\"/film/fichefilm_gen_cfilm=";
     518        my $end = "</a></h4>";
    399519           
    400            my @movies;
     520        my @movies;
    401521       
    402            my $data = $exact_matches;
    403            if ($data eq "") {
    404               if (defined $opt_d) { printf("# no results\n"); }
    405                 $typerecherche = $typerecherche +2 ;
    406            }else{
    407               my $start = index($data, $beg);
    408               my $finish = index($data, $end, $start);
     522        my $data = $exact_matches;
     523        if ($data eq "")
     524        {
     525            if (defined $opt_d) { printf("# no results\n"); }
     526            $typerecherche = $typerecherche +2 ;
     527        }
     528        else
     529        {
     530            my $start = index($data, $beg);
     531            my $finish = index($data, $end, $start);
    409532           
    410               my $title;
    411               while ($start != -1) {
    412                  $start += length($beg);
    413                  my $sub = substr($data, $start, $finish - $start);
    414                  my ($movienum, $moviename) = split(".html\" class=\"link1\">", $sub);
    415                  $title = removeTag($moviename);
    416                  $moviename = removeTag($moviename);
    417                  my ($movieyear)= $moviename =~/\((\d+)\)/;
    418                  if ($movieyear){$title = $title." (".$movieyear.")"; }
    419                  $moviename=$title ;
    420              
    421                  # advance data to next movie
    422                  $data = substr($data, - (length($data) - $finish));
    423                  $start = index($data, $beg);
    424                  $finish = index($data, $end, $start + 1);
    425              
    426                  # add to array
    427                  $movies[$count++] = $movienum . ":" . $moviename;
    428               }
    429              
    430               # display array of values
    431               for $movie (@movies) {
    432                 print "$movie\n";
    433               }
    434            }
    435       }
     533            my $title;
     534            while ($start != -1)
     535            {
     536                $start += length($beg);
     537                my $sub = substr($data, $start, $finish - $start);
     538                my ($movienum, $moviename) = split(".html\" class=\"link1\">",
     539                    $sub);
     540                $title = removeTag($moviename);
     541                $moviename = removeTag($moviename);
     542                my ($movieyear)= $moviename =~/\((\d+)\)/;
     543                if ($movieyear){$title = $title." (".$movieyear.")"; }
     544                $moviename=$title ;
     545
     546                # advance data to next movie
     547                $data = substr($data, - (length($data) - $finish));
     548                $start = index($data, $beg);
     549                $finish = index($data, $end, $start + 1);
     550
     551                # add to array
     552                $movies[$count++] = $movienum . ":" . $moviename;
     553            }
     554
     555            # display array of values
     556            for $movie (@movies)
     557            {
     558                print "$movie\n";
     559            }
     560        }
     561    }
    436562}
    437563
    438564#
     
    446572                "info" => \$opt_i,
    447573                "originaltitle" => \$opt_originaltitle,
    448574                "casting" => \$opt_casting,
     575                "uk" => \$opt_uk,
    449576                "Data" => \$opt_D,
    450577                "Movie" => \$opt_M,
    451578                "Poster" => \$opt_P
     
    459586# print out usage if needed
    460587if (defined $opt_h || $#ARGV<0) { help(); }
    461588
    462 if (defined $opt_D) {
    463    # take movieid from cmdline arg
    464    $movieid = shift || die "Usage : $0 -D <movieid>\n";
    465    getMovieData($movieid);
    466 }
    467 
    468 elsif (defined $opt_P) {
    469    # take movieid from cmdline arg
    470    $movieid = shift || die "Usage : $0 -P <movieid>\n";
    471    getMoviePoster($movieid);
    472 }
    473 
    474 elsif (defined $opt_M) {
    475    # take query from cmdline arg
    476    #$options = shift || die "Usage : $0 -M <query>\n";
    477    my $query;
    478    my $options = '';
    479    foreach $key (0 .. $#ARGV) {
    480         $query .= $ARGV[$key]. ' ';
    481    }
    482    getMovieList($query, $options);
     589if (defined $opt_D)
     590{
     591    # take movieid from cmdline arg
     592    $movieid = shift || die "Usage : $0 -D <movieid>\n";
     593    getMovieData($movieid);
     594}
     595elsif (defined $opt_P)
     596{
     597    # take movieid from cmdline arg
     598    $movieid = shift || die "Usage : $0 -P <movieid>\n";
     599    getMoviePoster($movieid);
     600}
     601elsif (defined $opt_M)
     602{
     603    # take query from cmdline arg
     604    #$options = shift || die "Usage : $0 -M <query>\n";
     605    my $query;
     606    my $options = '';
     607    foreach $key (0 .. $#ARGV)
     608    {
     609        $query .= $ARGV[$key]. ' ';
     610    }
     611    getMovieList($query, $options);
    483612}