Changes between Initial Version and Version 2 of Ticket #9074


Ignore:
Timestamp:
Oct 8, 2010, 3:42:54 PM (10 years ago)
Author:
robertm
Comment:

Legend:

Unmodified
Added
Removed
Modified
  • Ticket #9074

    • Property Status changed from new to assigned
    • Property Component changed from MythTV - General to Plugin - MythVideo
    • Property Version changed from Unspecified to Trunk Head
    • Property Milestone changed from unknown to 0.25
    • Property Owner set to robertm
  • Ticket #9074 – Description

    initial v2  
    44
    55I hope this helps
    6 
    7 
    8 
    9 #!/usr/bin/perl -w
    10 
    11 #
    12 # This perl script is intended to perform movie data lookups in french based on
    13 # the www.allocine.fr website
    14 #
    15 # For more information on MythVideo's external movie lookup mechanism, see
    16 # the README file in this directory.
    17 #
    18 # Original author: Xavier Hervy (maxpower44 AT tiscali DOT fr)
    19 
    20 # changes:
    21 #   20-10-2009: Geoffroy Geerseau ( http://www.soslinux.net : jamdess AT soslinux DOT net )
    22 #   Modified for the new allocine templates
    23 #   25-10-2009: Geoffroy Geerseau ( http://www.soslinux.net : jamdess AT soslinux DOT net )
    24 #   Poster download correction
    25 #   Userrating correction
    26 #   28-10-2009: Robert McNamara (Myth Dev)
    27 #   Fix issues in above patches-- files should never be downloaded to /tmp.
    28 #   Convert script to output in new grabber output format for .23.  Leave backwards compat.
    29 #   02-11-2009: Geoffroy Geerseau
    30 #   Allocine have, once again, change their templates...
    31 #   06-08-2010: Alexandra Lepercq
    32 #   Allocine have, once again, change their templates...
    33 #   Add some data from api.allocine.fr (thanks to http://wiki.gromez.fr/dev/api/allocine)
    34 #       http://api.allocine.fr/xml/movie?code=$movieid&partner=3
    35 
    36 use File::Basename;
    37 use File::Copy;
    38 use lib dirname($0);
    39 use Encode;
    40 use utf8;
    41 use Encode 'from_to';
    42 use MythTV::MythVideoCommon;
    43 
    44 use vars qw($opt_h $opt_r $opt_d $opt_i $opt_v $opt_D $opt_l $opt_M $opt_P $opt_originaltitle $opt_casting $opt_u_dummy);
    45 use Getopt::Long;
    46 
    47 $title = "Allocine Query";
    48 $version = "v2.06";
    49 $author = "Xavier Hervy";
    50 push(@MythTV::MythVideoCommon::URL_get_extras, ($title, $version));
    51 
    52 binmode(STDOUT, ":utf8");
    53 
    54 # display usage
    55 sub usage {
    56    print "usage: $0 -hviocMPD [parameters]\n";
    57    print "       -h, --help                       help\n";
    58    print "       -v, --version                    display version\n";
    59    print "       -i, --info                       display info\n";
    60    print "       -o, --originaltitle              concatenate title and original title\n";
    61    print "       -c, --casting                    with -D option, grap the complete actor list (much slower)\n";
    62    print "\n";
    63    print "       -M <query>,   --movie query>     get movie list\n";
    64    print "       -D <movieid>, --data <movieid>   get movie data\n";
    65    print "       -P <movieid>, --poster <movieid> get movie poster\n";
    66    exit(-1);
    67 }
    68 
    69 # display 1-line of info that describes the version of the program
    70 sub version {
    71    print "$title ($version) by $author\n"
    72 }
    73 
    74 # display 1-line of info that can describe the type of query used
    75 sub info {
    76    print "Performs queries using the www.allocine.fr website.\n";
    77 }
    78 
    79 # display detailed help
    80 sub help {
    81    version();
    82    info();
    83    usage();
    84 }
    85 
    86 # returns text within 'data' without tag
    87 sub removeTag {
    88    my ($data)=@_; # grab parameters
    89 
    90    my $ldata = lc($data);
    91    my $start = index($ldata, "<");
    92    my $finish = index($ldata, ">", $start)+1;
    93    while ($start != -1 && $finish != -1){
    94       $data = substr($data, 0, $start).substr($data, $finish, length($data));
    95       $ldata = lc($data);
    96       $start = index($ldata, "<");
    97       $finish = index($ldata, ">", $start)+1;
    98    }
    99    return $data;
    100 }
    101 
    102 
    103 # get Movie Data
    104 sub getMovieData {
    105    my ($movieid)=@_; # grab movieid parameter
    106    if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}
    107 
    108    # get Movie MetaData from api.allocine
    109    $requestAPI = "http://api.allocine.fr/xml/movie?code=$movieid&partner=3";
    110    $responseAPI = myth_url_get($requestAPI);
    111    from_to($responseAPI,'utf-8','iso-8859-1');
    112 
    113 
    114    # get the search results  page
    115    my $request = "http://www.allocine.fr/film/fichefilm_gen_cfilm=" . $movieid . ".html";
    116    my $allocineurl = $request;
    117    if (defined $opt_d) { printf("# request: '%s'\n", $request); }
    118    my ($rc, $response) = myth_url_get($request);
    119    from_to($response,'utf-8','iso-8859-1');
    120 
    121 
    122    # parse Title and Year
    123 #   my $title = parseBetween($response, "<title>", "</title>");
    124 #   $title =~ s/\s*-\s*AlloCin.*//;
    125 #   $title =~ s/(.*)\(.*$/$1/;
    126 #   $title =~ s/^\s*(.*)\s*$/$1/;
    127 #   my $original_title = parseBetween($response, "Titre original :","<br");
    128 #   $original_title = trim(removeTag($original_title));
    129 #   if (defined $opt_originaltitle){
    130 #      if ($original_title ne  ""){
    131 #        $title = $title . " (" . $original_title . ")";
    132 #      }
    133 #   }
    134 #   $title = removeTag($title);
    135 #   my $year = parseBetween(parseBetween($response,"/film/tous/decennie","/a>"),'>','<');
    136    my $titleApi = parseBetween($responseAPI,"<title>","</title>");
    137    my $originaltitleApi = parseBetween($responseAPI,"<originalTitle>","</originalTitle>");
    138    my $yearApi = parseBetween($responseAPI,"<productionYear>","</productionYear>");
    139 
    140 
    141    # parse Director
    142 #   my $tempresponse = $response;
    143 #   my $director = parseBetween($tempresponse,"Réalisé par ","</a></span>");
    144 #   $director = removeTag($director);
    145 
    146 #   my $directorApi = parseBetween($responseAPI,"<directors>","</directors>");
    147 
    148 
    149    # parse Plot
    150 #   my $plot = parseBetween($response,"Synopsis : </span>","</p>");
    151 #   $plot =~ s/\n//g;
    152 #   $plot = trim(removeTag($plot));
    153    my $plotApi = parseBetween($responseAPI,"<synopsis>","</synopsis>");
    154 
    155  
    156    # parse User Rating
    157 #   my $userrating=0;
    158 #   my $tmpratings = parseBetween(parseBetween($response,"/film/critiquepublic_gen_cfilm=$movieid.html\"><img", "</span></p></div>"),'(',')');
    159 #   $tmpratings =~ s/,/./gm;
    160 #   if($tmpratings =~ /^(\d+\.?\d*|\.\d+)$/ && !$tmpratings eq "")
    161 #   {   
    162 #       $userrating = int($tmpratings*2.5);
    163 #   }
    164 #   else
    165 #   {
    166 #       $userrating =  "";
    167 #   }
    168    my $userratingOrig = parseBetween($responseAPI,"<userRating>","</userRating>");
    169    $userratingApi = int($userratingOrig * 2.5);
    170 
    171 
    172    # parse Rating
    173    my $movierating = parseBetween($response,"Interdit aux moins de ","ans");
    174    if (!($movierating eq ""))
    175         { $movierating = "Interdit -" . $movierating . "ans";}
    176    else
    177         {
    178                 $movierating = parseBetween($response,"Visible ","enfants");
    179                 if (!($movierating eq "")){ $movierating = "Enfants";};
    180         }
    181 
    182    my $movieratingTout = parseBetween($responseAPI,"<ratingStats>","</ratingStats>");
    183 
    184 
    185    # parse Movie length
    186 #   my $runtime = trim(parseBetween($response,"Durée :","min"));
    187 #   my $heure;
    188 #   my $minutes;
    189 #   ($heure,$minutes)=($runtime=~/[^\d]*(\d+)[^\d]*(\d*)/);
    190 #   if (!$heure){ $heure = 0; }
    191 #   if (!$minutes){
    192 #      $runtime = $heure * 60;
    193 #   }else{
    194 #       $runtime = $heure * 60 + $minutes;
    195 #   }
    196    my $runtimeOrig = parseBetween($responseAPI,"<runtime>","</runtime>");
    197    $runtimeApi = $runtimeOrig / 60;
    198 
    199 
    200    # parse Cast
    201 #   my $castchunk;
    202 #   $castchunk = parseBetween($response, "      Avec ","<a href=\"/film/casting_gen_cfilm=$movieid.html\" >plus</a>"); 
    203 #   my $cast = "";
    204 #   $cast = trim(join(',', removeTag($castchunk)));
    205    my $castApi = parseBetween($responseAPI,"<casting>","</casting>");
    206    $castApi =~ s!<castMember>!\n!g;
    207    $castApi =~ s!</castMember>!/>!g;
    208    $castApi =~ s/person code/person name/g;
    209    $castApi =~ s!</person>!"!g;
    210    $castApi =~ s/<activity code/ job/g;
    211    $castApi =~ s!</activity>!"!g;
    212    $castApi =~ s/<role>/ character="/g;
    213    $castApi =~ s!</role>!"!g;
    214    $castApi =~ s![0-9]!!g;
    215    $castApi =~ s!">!!g;
    216    $castApi =~ s/<picture href/ picture/g;
    217    $castApi =~ s!</picture>!"!g;
    218    $castApi =~ s!picture="http://images.allocine.fr/medias/nmedia/////.jpg"!!g;
    219    $castApi =~ s!Réalisateur!director!g;
    220    $castApi =~ s!Acteur!actor!g;
    221    $castApi =~ s!Producteur!producer!g;
    222    $castApi =~ s!Compositeur!composer!g;
    223 
    224 
    225    #Genres
    226 #   my $genres = parseBetween($response,"Genre :","<br");
    227 #   $genres =~ s/\s*\n*(.*)\s*$/ $1/;
    228 #   $genres = trim(removeTag($genres));
    229 #   $genres =~ s/\s*\n*(.*)\s*$/ $1/;
    230    my $genreApi = parseBetween($responseAPI,"<genreList>","</genreList>");
    231    $genreApi =~ s/genre code/category name/g;
    232    $genreApi =~ s!</genre>!"/>\n!g;
    233    $genreApi =~ s![0-9]!!g;
    234    $genreApi =~ s!">!!g;
    235 #   $genreApi =~ s!Musical!Comédie musicale!g;
    236 #   $genreApi =~ s!Action!Aventure, Action!g;
    237 #   $genreApi =~ s!Aventure!!g;
    238 
    239 
    240    #Countries
    241 #   my $countries = parseBetween($response,"Long-métrage",".");
    242 #   $countries = trim(removeTag($countries));
    243 #   $countries =~ s/\s*(.*)\s*$/ $1/;
    244 #   $countries = trim($countries);
    245 #   $countries =~ s/\n//gm;
    246 #   $countries =~ s/\s//gm;
    247 #   $countries =~ s/,/, /gm;
    248 #   if ($countries eq "allemand") { $countries = "Allemagne"; }
    249 #   if ($countries eq "américain") { $countries = "États-Unis"; }
    250 #   if ($countries eq "autrichien") { $countries = "Autriche"; }
    251 #   if ($countries eq "britannique") { $countries = "Royaume Uni"; }
    252 #   if ($countries eq "canadien") { $countries = "Canada"; }
    253 #   if ($countries eq "français") { $countries = "France"; }
    254 #   if ($countries eq "italien") { $countries = "Italie"; }
    255 #   if ($countries eq "russe") { $countries = "Russie"; }
    256    my $countryApi = parseBetween($responseAPI,"<nationalityList>","</nationalityList>");
    257    $countryApi =~ s/nationality code/country name/g;
    258    $countryApi =~ s!</nationality>!"/>\n!g;
    259    $countryApi =~ s![0-9]!!g;
    260    $countryApi =~ s!">!!g;
    261 #   $countryApi =~ s!Grande-Bretagne!Royaume Uni!g;
    262 #   $countryApi =~ s!U.S.A.!États-Unis!g;
    263 
    264 
    265    # parse for Coverart
    266 #   my $mediafile = parseBetween($response,"<a href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\" >");
    267 #   $covrequest = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile;
    268 #   ($rc, $covresponse) = myth_url_get($covrequest);
    269 #   my $uri = parseBetween(parseBetween($covresponse,"<div class=\"tac\" style=\"\">","</div>"),"<img src=\"","\" alt");
    270    $request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
    271    ($rc, $response) = myth_url_get($request);
    272    my $mediafile = parseBetween($response,"<a href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\" >");
    273    $request2 = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile;
    274    ($rc, $response2) = myth_url_get($request2);
    275    $uri = trim(parseBetween($response2,"<a Target=\"_blank\" Class=\"fs11\" href=\"","\">Agrandir</a>"));
    276    if ($uri eq "")
    277    {
    278         $request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
    279         ($rc, $response) = myth_url_get($request);
    280         my $tmp_uri = parseBetween($response, "<a href=\"/film/fichefilm-".$movieid."/affiches/\">"," alt=");
    281         $tmp_uri =~ s/\n/ /gm;
    282         $uri = trim(parseBetween($tmp_uri,"<img src='h","'"));
    283         if($uri ne "")
    284         {
    285                 $uri = "h$uri";
    286         }
    287    }
    288    # if no picture was found, just download the empty poster
    289    if($uri eq ""){
    290         $uri = "http://images.allocine.fr/r_160_214/commons/emptymedia/AffichetteAllocine.gif";
    291    }
    292 
    293 
    294    # output fields (these field names must match what MythVideo is looking for)
    295 #   print "Title:$title\n";
    296 #   if (!(defined $opt_originaltitle)){
    297 #    print "OriginalTitle:$original_title\n";
    298 #   }
    299 #   print "URL:$allocineurl\n";
    300 #   print "Year:$year\n";
    301 #   print "Director:$director\n";
    302 #   print "Plot:$plot\n";
    303 #   print "UserRating:$userrating\n";
    304 #   print "MovieRating:$movierating\n";
    305 #   print "Runtime:$runtime\n";
    306 #   print "Cast:$cast\n";
    307 #   print "Genres:$genres\n";
    308 #   print "Countries:$countries\n";
    309 #   print "Coverart: $uri\n";
    310 
    311 #   print "\n";
    312 #   print "OriginaltitleApi:$originaltitleApi\n";
    313 #   print "MovieratingTout:$movieratingTout\n";
    314 #   print "\n";
    315 
    316 
    317 
    318    # MetaData output
    319 print "<?xml version='1.0' encoding='UTF-8'?>\n";
    320 print "<metadata>\n";
    321   print "<item>\n";
    322     print "<inetref>$movieid</inetref>\n";
    323     print "<title>$titleApi</title>\n";
    324     print "<language>fr</language>\n";
    325     print "<description>$plotApi</description>\n";
    326     print "<countries>\n";
    327 #      print "<country name=\"$countries\"/>\n";
    328       print "$countryApi";
    329     print "</countries>\n";
    330     print "<categories>\n";
    331 #      print "<category name=\"$genres\"/>\n";
    332       print "$genreApi";
    333     print "</categories>\n";
    334     print "<userrating>$userratingApi</userrating>\n";
    335 #    print "<movierating>$movierating</movierating>\n";
    336     print "<year>$yearApi</year>\n";
    337     print "<runtime>$runtimeApi</runtime>\n";
    338     print "<homepage>$allocineurl</homepage>\n";
    339 #    print "<trailerURL>$bandeannonceurl</trailerURL>\n";
    340     print "<people>";
    341 #      print "<person name=\"$director\" job=\"Director\"/>\n";
    342 #      print "<person name=\"$cast\" job=\"Actor\"/>\n";
    343       print "$castApi";
    344     print "</people>\n";
    345     print "<images>\n";
    346       print "<image type=\"coverart\" url=\"$uri\"/>\n";
    347 #      print "<image type=\"fanart\" url=\"$fanarturi\"/>\n";
    348 #      print "<image type=\"screenshot\" url=\"$screenshoturi\"/>\n";
    349 #      print "<image type=\"banner\" url=\"$banneruri\"/>\n";
    350     print "</images>\n";
    351   print "</item>\n";
    352 print "</metadata>\n";
    353 
    354 
    355 
    356 }
    357 
    358 # dump Movie Poster
    359 sub getMoviePoster {
    360    my ($movieid)=@_; # grab movieid parameter
    361    if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}
    362 
    363    # get the search results  page
    364    
    365    my $request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
    366    if (defined $opt_d) { printf("# request: '%s'\n", $request); }
    367    my ($rc, $response) = myth_url_get($request);
    368    my $mediafile = parseBetween($response,"<a href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\" >");
    369 
    370    $request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile;
    371    ($rc, $response) = myth_url_get($request);
    372    my $uri = parseBetween(parseBetween($response,"<div class=\"tac\" style=\"\">","</div>"),"<img src=\"","\" alt");
    373    if ($uri eq "")
    374    {
    375         $request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
    376         ($rc, $response) = myth_url_get($request);
    377         my $tmp_uri = parseBetween($response, "<a href=\"/film/fichefilm-".$movieid."/affiches/\">"," alt=");
    378         $tmp_uri =~ s/\n/ /gm;
    379         $uri = trim(parseBetween($tmp_uri,"<img src='h","'"));
    380         if($uri ne "")
    381         {
    382                 $uri = "h$uri";
    383         }
    384         print "$uri\n";
    385    }
    386    
    387    # if no picture was found, just download the empty poster
    388    if($uri eq ""){
    389         $uri = "http://images.allocine.fr/r_160_214/commons/emptymedia/AffichetteAllocine.gif";
    390    }
    391 
    392    print "$uri\n";
    393 }
    394 
    395 sub getMovieList {
    396         my ($filename, $options) = @_; # grab parameters
    397 
    398         my $query = cleanTitleQuery($filename);
    399         if (!$options) { $options = ""; }
    400         if (defined $opt_d) {
    401                 printf("# query: '%s', options: '%s'\n", $query, $options);
    402         }
    403 
    404         # get the search results  page
    405         my $request = "http://www.allocine.fr/recherche/1/?q=$query";
    406         if (defined $opt_d) { printf("# request: '%s'\n", $request); }
    407         my ($rc, $response) = myth_url_get($request);
    408         from_to($response,'utf-8','iso-8859-1');
    409         $response =~ s/\n//g;
    410         # extract possible matches
    411         #    possible matches are grouped in several catagories: 
    412         #        exact, partial, and approximate
    413         my $exact_matches = $response;
    414         # parse movie list from matches
    415         my $beg = "<div style=\"margin-top:-5px;\">";
    416         my $end = "<span class=\"fs11\">";
    417 
    418         my @movies;
    419 
    420         my $data = $exact_matches;
    421         if ($data eq "") {
    422                 if (defined $opt_d) { printf("# no results\n"); }
    423         } else {
    424                 my $start = index($data, $beg);
    425                 my $finish = index($data, $end, $start);
    426 
    427                 my $title;
    428                 my $movienum;
    429                 my $moviename;
    430                 while ($start != -1) {
    431                         $start += length($beg);
    432                         my $sub1 = substr($data, $start, $finish - $start);
    433                         $sub1 =~ s/(.*)\(.*$/$1/;
    434                         $moviename = trim(removeTag($sub1));
    435                         $movienum = parseBetween($sub1,"<a href='/film/fichefilm_gen_cfilm=",".html");
    436                        
    437                         $title = removeTag($moviename);
    438                         $moviename = removeTag($moviename);
    439                         my ($movieyear)= $moviename =~/\((\d+)\)/;
    440                         if ($movieyear) {
    441                                 $title = $title." (".$movieyear.")";
    442                         }
    443                         $moviename=$title ;
    444 
    445                         # advance data to next movie
    446                         $data = substr($data, - (length($data) - $finish));
    447                         $start = index($data, $beg);
    448                         $finish = index($data, $end, $start);
    449 
    450                         # add to array
    451                         push(@movies, "$movienum:$moviename");
    452                 }
    453 
    454                 # display array of values
    455                 for $movie (@movies) {
    456                         print "$movie\n";
    457                 }
    458         }
    459 }
    460 
    461 #
    462 # Main Program
    463 #
    464 
    465 # parse command line arguments
    466 
    467     GetOptions( "utf8" => \$opt_u_dummy,
    468                 "version" => \$opt_v,
    469                 "info" => \$opt_i,
    470                 "language" => \$opt_l,
    471                 "originaltitle" => \$opt_originaltitle,
    472                 "casting" => \$opt_casting,
    473                 "Data" => \$opt_D,
    474                 "Movie" => \$opt_M,
    475                 "Poster" => \$opt_P
    476                 );       
    477            
    478 
    479 # print out info
    480 if (defined $opt_v) { version(); exit 1; }
    481 if (defined $opt_i) { info(); exit 1; }
    482 if (defined $opt_l) {
    483     my $lang = shift;
    484 }
    485 
    486 # print out usage if needed
    487 if (defined $opt_h || $#ARGV<0) { help(); }
    488 
    489 if (defined $opt_D) {
    490    # take movieid from cmdline arg
    491    $movieid = shift || die "Usage : $0 -D <movieid>\n";
    492    getMovieData($movieid);
    493 }
    494 
    495 elsif (defined $opt_P) {
    496    # take movieid from cmdline arg
    497    $movieid = shift || die "Usage : $0 -P <movieid>\n";
    498    getMoviePoster($movieid);
    499 }
    500 
    501 elsif (defined $opt_M) {
    502    # take query from cmdline arg
    503    #$options = shift || die "Usage : $0 -M <query>\n";
    504    my $query;
    505    my $options = '';
    506    foreach $key (0 .. $#ARGV) {
    507         $query .= $ARGV[$key]. ' ';
    508    }
    509    getMovieList($query, $options);
    510 }
    511 # vim: set expandtab ts=3 sw=3 :