Ticket #9074: allocine.2.pl

File allocine.2.pl, 17.0 KB (added by robertm, 10 years ago)

Allocine w/ Correct version output.

Line 
1#!/usr/bin/perl -w
2
3#
4# This perl script is intended to perform movie data lookups in french based on
5# the www.allocine.fr website
6#
7# For more information on MythVideo's external movie lookup mechanism, see
8# the README file in this directory.
9#
10# Original author: Xavier Hervy (maxpower44 AT tiscali DOT fr)
11
12# changes:
13#   20-10-2009: Geoffroy Geerseau ( http://www.soslinux.net : jamdess AT soslinux DOT net )
14#   Modified for the new allocine templates
15#   25-10-2009: Geoffroy Geerseau ( http://www.soslinux.net : jamdess AT soslinux DOT net )
16#   Poster download correction
17#   Userrating correction
18#   28-10-2009: Robert McNamara (Myth Dev)
19#   Fix issues in above patches-- files should never be downloaded to /tmp.
20#   Convert script to output in new grabber output format for .23.  Leave backwards compat.
21#   02-11-2009: Geoffroy Geerseau
22#   Allocine have, once again, change their templates...
23#   06-08-2010: Alexandra Lepercq
24#   Allocine have, once again, change their templates...
25#   Add some data from api.allocine.fr (thanks to http://wiki.gromez.fr/dev/api/allocine)
26#       http://api.allocine.fr/xml/movie?code=$movieid&partner=3
27
28use File::Basename;
29use File::Copy;
30use lib dirname($0);
31use Encode;
32use utf8;
33use Encode 'from_to';
34use MythTV::MythVideoCommon;
35
36use vars qw($opt_h $opt_r $opt_d $opt_i $opt_v $opt_D $opt_l $opt_M $opt_P $opt_originaltitle $opt_casting $opt_u_dummy);
37use Getopt::Long;
38
39$xmlout = "<grabber><name>Allocine</name><author>Xavier Hervy</author><thumbnail>allocine.png</thumbnail><command>allocine.pl</command><type>movie</type><description>Search and metadata downloads from Allocine (France)</description><version>0.25</version></grabber>";
40$title = "Allocine Query";
41$version = "v2.06";
42
43push(@MythTV::MythVideoCommon::URL_get_extras, ($title, $version));
44
45binmode(STDOUT, ":utf8");
46
47# display usage
48sub usage {
49   print "usage: $0 -hviocMPD [parameters]\n";
50   print "       -h, --help                       help\n";
51   print "       -v, --version                    display version\n";
52   print "       -i, --info                       display info\n";
53   print "       -o, --originaltitle              concatenate title and original title\n";
54   print "       -c, --casting                    with -D option, grap the complete actor list (much slower)\n";
55   print "\n";
56   print "       -M <query>,   --movie query>     get movie list\n";
57   print "       -D <movieid>, --data <movieid>   get movie data\n";
58   print "       -P <movieid>, --poster <movieid> get movie poster\n";
59   exit(-1);
60}
61
62# display 1-line of info that describes the version of the program
63sub version {
64   print "$xmlout\n"
65}
66
67# display 1-line of info that can describe the type of query used
68sub info {
69   print "Performs queries using the www.allocine.fr website.\n";
70}
71
72# display detailed help
73sub help {
74   version();
75   info();
76   usage();
77}
78
79# returns text within 'data' without tag
80sub removeTag {
81   my ($data)=@_; # grab parameters
82
83   my $ldata = lc($data);
84   my $start = index($ldata, "<");
85   my $finish = index($ldata, ">", $start)+1;
86   while ($start != -1 && $finish != -1){
87      $data = substr($data, 0, $start).substr($data, $finish, length($data));
88      $ldata = lc($data);
89      $start = index($ldata, "<");
90      $finish = index($ldata, ">", $start)+1;
91   }
92   return $data;
93}
94
95
96# get Movie Data
97sub getMovieData {
98   my ($movieid)=@_; # grab movieid parameter
99   if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}
100
101   # get Movie MetaData from api.allocine
102   $requestAPI = "http://api.allocine.fr/xml/movie?code=$movieid&partner=3";
103   $responseAPI = myth_url_get($requestAPI);
104   from_to($responseAPI,'utf-8','iso-8859-1');
105
106
107   # get the search results  page
108   my $request = "http://www.allocine.fr/film/fichefilm_gen_cfilm=" . $movieid . ".html";
109   my $allocineurl = $request;
110   if (defined $opt_d) { printf("# request: '%s'\n", $request); }
111   my ($rc, $response) = myth_url_get($request);
112   from_to($response,'utf-8','iso-8859-1');
113
114
115   # parse Title and Year
116#   my $title = parseBetween($response, "<title>", "</title>");
117#   $title =~ s/\s*-\s*AlloCin.*//;
118#   $title =~ s/(.*)\(.*$/$1/;
119#   $title =~ s/^\s*(.*)\s*$/$1/;
120#   my $original_title = parseBetween($response, "Titre original :","<br");
121#   $original_title = trim(removeTag($original_title));
122#   if (defined $opt_originaltitle){
123#      if ($original_title ne  ""){
124#        $title = $title . " (" . $original_title . ")";
125#      }
126#   }
127#   $title = removeTag($title);
128#   my $year = parseBetween(parseBetween($response,"/film/tous/decennie","/a>"),'>','<');
129   my $titleApi = parseBetween($responseAPI,"<title>","</title>");
130   my $originaltitleApi = parseBetween($responseAPI,"<originalTitle>","</originalTitle>");
131   my $yearApi = parseBetween($responseAPI,"<productionYear>","</productionYear>");
132
133
134   # parse Director
135#   my $tempresponse = $response;
136#   my $director = parseBetween($tempresponse,"Réalisé par ","</a></span>");
137#   $director = removeTag($director);
138
139#   my $directorApi = parseBetween($responseAPI,"<directors>","</directors>");
140
141
142   # parse Plot
143#   my $plot = parseBetween($response,"Synopsis : </span>","</p>");
144#   $plot =~ s/\n//g;
145#   $plot = trim(removeTag($plot));
146   my $plotApi = parseBetween($responseAPI,"<synopsis>","</synopsis>");
147
148 
149   # parse User Rating
150#   my $userrating=0;
151#   my $tmpratings = parseBetween(parseBetween($response,"/film/critiquepublic_gen_cfilm=$movieid.html\"><img", "</span></p></div>"),'(',')');
152#   $tmpratings =~ s/,/./gm;
153#   if($tmpratings =~ /^(\d+\.?\d*|\.\d+)$/ && !$tmpratings eq "")
154#   {   
155#       $userrating = int($tmpratings*2.5);
156#   }
157#   else
158#   {
159#       $userrating =  "";
160#   }
161   my $userratingOrig = parseBetween($responseAPI,"<userRating>","</userRating>");
162   $userratingApi = int($userratingOrig * 2.5);
163
164
165   # parse Rating
166   my $movierating = parseBetween($response,"Interdit aux moins de ","ans");
167   if (!($movierating eq ""))
168        { $movierating = "Interdit -" . $movierating . "ans";}
169   else
170        {
171                $movierating = parseBetween($response,"Visible ","enfants");
172                if (!($movierating eq "")){ $movierating = "Enfants";};
173        }
174
175   my $movieratingTout = parseBetween($responseAPI,"<ratingStats>","</ratingStats>");
176
177
178   # parse Movie length
179#   my $runtime = trim(parseBetween($response,"Durée :","min"));
180#   my $heure;
181#   my $minutes;
182#   ($heure,$minutes)=($runtime=~/[^\d]*(\d+)[^\d]*(\d*)/);
183#   if (!$heure){ $heure = 0; }
184#   if (!$minutes){
185#      $runtime = $heure * 60;
186#   }else{
187#       $runtime = $heure * 60 + $minutes;
188#   }
189   my $runtimeOrig = parseBetween($responseAPI,"<runtime>","</runtime>");
190   $runtimeApi = $runtimeOrig / 60;
191
192
193   # parse Cast
194#   my $castchunk;
195#   $castchunk = parseBetween($response, "      Avec ","<a href=\"/film/casting_gen_cfilm=$movieid.html\" >plus</a>"); 
196#   my $cast = "";
197#   $cast = trim(join(',', removeTag($castchunk)));
198   my $castApi = parseBetween($responseAPI,"<casting>","</casting>");
199   $castApi =~ s!<castMember>!\n!g;
200   $castApi =~ s!</castMember>!/>!g;
201   $castApi =~ s/person code/person name/g;
202   $castApi =~ s!</person>!"!g;
203   $castApi =~ s/<activity code/ job/g;
204   $castApi =~ s!</activity>!"!g;
205   $castApi =~ s/<role>/ character="/g;
206   $castApi =~ s!</role>!"!g;
207   $castApi =~ s![0-9]!!g;
208   $castApi =~ s!">!!g;
209   $castApi =~ s/<picture href/ picture/g;
210   $castApi =~ s!</picture>!"!g;
211   $castApi =~ s!picture="http://images.allocine.fr/medias/nmedia/////.jpg"!!g;
212   $castApi =~ s!Réalisateur!director!g;
213   $castApi =~ s!Acteur!actor!g;
214   $castApi =~ s!Producteur!producer!g;
215   $castApi =~ s!Compositeur!composer!g;
216
217
218   #Genres
219#   my $genres = parseBetween($response,"Genre :","<br");
220#   $genres =~ s/\s*\n*(.*)\s*$/ $1/;
221#   $genres = trim(removeTag($genres));
222#   $genres =~ s/\s*\n*(.*)\s*$/ $1/;
223   my $genreApi = parseBetween($responseAPI,"<genreList>","</genreList>");
224   $genreApi =~ s/genre code/category name/g;
225   $genreApi =~ s!</genre>!"/>\n!g;
226   $genreApi =~ s![0-9]!!g;
227   $genreApi =~ s!">!!g;
228#   $genreApi =~ s!Musical!Comédie musicale!g;
229#   $genreApi =~ s!Action!Aventure, Action!g;
230#   $genreApi =~ s!Aventure!!g;
231
232
233   #Countries
234#   my $countries = parseBetween($response,"Long-métrage",".");
235#   $countries = trim(removeTag($countries));
236#   $countries =~ s/\s*(.*)\s*$/ $1/;
237#   $countries = trim($countries);
238#   $countries =~ s/\n//gm;
239#   $countries =~ s/\s//gm;
240#   $countries =~ s/,/, /gm;
241#   if ($countries eq "allemand") { $countries = "Allemagne"; }
242#   if ($countries eq "américain") { $countries = "États-Unis"; }
243#   if ($countries eq "autrichien") { $countries = "Autriche"; }
244#   if ($countries eq "britannique") { $countries = "Royaume Uni"; }
245#   if ($countries eq "canadien") { $countries = "Canada"; }
246#   if ($countries eq "français") { $countries = "France"; }
247#   if ($countries eq "italien") { $countries = "Italie"; }
248#   if ($countries eq "russe") { $countries = "Russie"; }
249   my $countryApi = parseBetween($responseAPI,"<nationalityList>","</nationalityList>");
250   $countryApi =~ s/nationality code/country name/g;
251   $countryApi =~ s!</nationality>!"/>\n!g;
252   $countryApi =~ s![0-9]!!g;
253   $countryApi =~ s!">!!g;
254#   $countryApi =~ s!Grande-Bretagne!Royaume Uni!g;
255#   $countryApi =~ s!U.S.A.!États-Unis!g;
256
257
258   # parse for Coverart
259#   my $mediafile = parseBetween($response,"<a href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\" >");
260#   $covrequest = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile;
261#   ($rc, $covresponse) = myth_url_get($covrequest);
262#   my $uri = parseBetween(parseBetween($covresponse,"<div class=\"tac\" style=\"\">","</div>"),"<img src=\"","\" alt");
263   $request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
264   ($rc, $response) = myth_url_get($request);
265   my $mediafile = parseBetween($response,"<a href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\" >");
266   $request2 = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile;
267   ($rc, $response2) = myth_url_get($request2);
268   $uri = trim(parseBetween($response2,"<a Target=\"_blank\" Class=\"fs11\" href=\"","\">Agrandir</a>"));
269   if ($uri eq "")
270   {
271        $request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
272        ($rc, $response) = myth_url_get($request);
273        my $tmp_uri = parseBetween($response, "<a href=\"/film/fichefilm-".$movieid."/affiches/\">"," alt=");
274        $tmp_uri =~ s/\n/ /gm;
275        $uri = trim(parseBetween($tmp_uri,"<img src='h","'"));
276        if($uri ne "")
277        {
278                $uri = "h$uri";
279        }
280   }
281   # if no picture was found, just download the empty poster
282   if($uri eq ""){
283        $uri = "http://images.allocine.fr/r_160_214/commons/emptymedia/AffichetteAllocine.gif";
284   }
285
286
287   # output fields (these field names must match what MythVideo is looking for)
288#   print "Title:$title\n";
289#   if (!(defined $opt_originaltitle)){
290#    print "OriginalTitle:$original_title\n";
291#   }
292#   print "URL:$allocineurl\n";
293#   print "Year:$year\n";
294#   print "Director:$director\n";
295#   print "Plot:$plot\n";
296#   print "UserRating:$userrating\n";
297#   print "MovieRating:$movierating\n";
298#   print "Runtime:$runtime\n";
299#   print "Cast:$cast\n";
300#   print "Genres:$genres\n";
301#   print "Countries:$countries\n";
302#   print "Coverart: $uri\n";
303
304#   print "\n";
305#   print "OriginaltitleApi:$originaltitleApi\n";
306#   print "MovieratingTout:$movieratingTout\n";
307#   print "\n";
308
309
310
311   # MetaData output
312print "<?xml version='1.0' encoding='UTF-8'?>\n";
313print "<metadata>\n";
314  print "<item>\n";
315    print "<inetref>$movieid</inetref>\n";
316    print "<title>$titleApi</title>\n";
317    print "<language>fr</language>\n";
318    print "<description>$plotApi</description>\n";
319    print "<countries>\n";
320#      print "<country name=\"$countries\"/>\n";
321      print "$countryApi";
322    print "</countries>\n";
323    print "<categories>\n";
324#      print "<category name=\"$genres\"/>\n";
325      print "$genreApi";
326    print "</categories>\n";
327    print "<userrating>$userratingApi</userrating>\n";
328#    print "<movierating>$movierating</movierating>\n";
329    print "<year>$yearApi</year>\n";
330    print "<runtime>$runtimeApi</runtime>\n";
331    print "<homepage>$allocineurl</homepage>\n";
332#    print "<trailerURL>$bandeannonceurl</trailerURL>\n";
333    print "<people>";
334#      print "<person name=\"$director\" job=\"Director\"/>\n";
335#      print "<person name=\"$cast\" job=\"Actor\"/>\n";
336      print "$castApi";
337    print "</people>\n";
338    print "<images>\n";
339      print "<image type=\"coverart\" url=\"$uri\"/>\n";
340#      print "<image type=\"fanart\" url=\"$fanarturi\"/>\n";
341#      print "<image type=\"screenshot\" url=\"$screenshoturi\"/>\n";
342#      print "<image type=\"banner\" url=\"$banneruri\"/>\n";
343    print "</images>\n";
344  print "</item>\n";
345print "</metadata>\n";
346
347
348
349}
350
351# dump Movie Poster
352sub getMoviePoster {
353   my ($movieid)=@_; # grab movieid parameter
354   if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}
355
356   # get the search results  page
357   
358   my $request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
359   if (defined $opt_d) { printf("# request: '%s'\n", $request); }
360   my ($rc, $response) = myth_url_get($request);
361   my $mediafile = parseBetween($response,"<a href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\" >");
362
363   $request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile;
364   ($rc, $response) = myth_url_get($request);
365   my $uri = parseBetween(parseBetween($response,"<div class=\"tac\" style=\"\">","</div>"),"<img src=\"","\" alt");
366   if ($uri eq "")
367   {
368        $request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
369        ($rc, $response) = myth_url_get($request);
370        my $tmp_uri = parseBetween($response, "<a href=\"/film/fichefilm-".$movieid."/affiches/\">"," alt=");
371        $tmp_uri =~ s/\n/ /gm;
372        $uri = trim(parseBetween($tmp_uri,"<img src='h","'"));
373        if($uri ne "")
374        {
375                $uri = "h$uri";
376        }
377        print "$uri\n";
378   }
379   
380   # if no picture was found, just download the empty poster
381   if($uri eq ""){
382        $uri = "http://images.allocine.fr/r_160_214/commons/emptymedia/AffichetteAllocine.gif";
383   }
384
385   print "$uri\n";
386}
387
388sub getMovieList {
389        my ($filename, $options) = @_; # grab parameters
390
391        my $query = cleanTitleQuery($filename);
392        if (!$options) { $options = ""; }
393        if (defined $opt_d) {
394                printf("# query: '%s', options: '%s'\n", $query, $options);
395        }
396
397        # get the search results  page
398        my $request = "http://www.allocine.fr/recherche/1/?q=$query";
399        if (defined $opt_d) { printf("# request: '%s'\n", $request); }
400        my ($rc, $response) = myth_url_get($request);
401        from_to($response,'utf-8','iso-8859-1');
402        $response =~ s/\n//g;
403        # extract possible matches
404        #    possible matches are grouped in several catagories: 
405        #        exact, partial, and approximate
406        my $exact_matches = $response;
407        # parse movie list from matches
408        my $beg = "<div style=\"margin-top:-5px;\">";
409        my $end = "<span class=\"fs11\">";
410
411        my @movies;
412
413        my $data = $exact_matches;
414        if ($data eq "") {
415                if (defined $opt_d) { printf("# no results\n"); }
416        } else {
417                my $start = index($data, $beg);
418                my $finish = index($data, $end, $start);
419
420                my $title;
421                my $movienum;
422                my $moviename;
423                while ($start != -1) {
424                        $start += length($beg);
425                        my $sub1 = substr($data, $start, $finish - $start);
426                        $sub1 =~ s/(.*)\(.*$/$1/;
427                        $moviename = trim(removeTag($sub1));
428                        $movienum = parseBetween($sub1,"<a href='/film/fichefilm_gen_cfilm=",".html");
429                       
430                        $title = removeTag($moviename);
431                        $moviename = removeTag($moviename);
432                        my ($movieyear)= $moviename =~/\((\d+)\)/;
433                        if ($movieyear) {
434                                $title = $title." (".$movieyear.")";
435                        }
436                        $moviename=$title ;
437
438                        # advance data to next movie
439                        $data = substr($data, - (length($data) - $finish));
440                        $start = index($data, $beg);
441                        $finish = index($data, $end, $start);
442
443                        # add to array
444                        push(@movies, "$movienum:$moviename");
445                }
446
447                # display array of values
448                for $movie (@movies) {
449                        print "$movie\n";
450                }
451        }
452}
453
454#
455# Main Program
456#
457
458# parse command line arguments
459
460    GetOptions( "utf8" => \$opt_u_dummy,
461                "version" => \$opt_v,
462                "info" => \$opt_i,
463                "language" => \$opt_l,
464                "originaltitle" => \$opt_originaltitle,
465                "casting" => \$opt_casting,
466                "Data" => \$opt_D,
467                "Movie" => \$opt_M,
468                "Poster" => \$opt_P
469                );       
470           
471
472# print out info
473if (defined $opt_v) { version(); exit 1; }
474if (defined $opt_i) { info(); exit 1; }
475if (defined $opt_l) {
476    my $lang = shift;
477}
478
479# print out usage if needed
480if (defined $opt_h || $#ARGV<0) { help(); }
481
482if (defined $opt_D) {
483   # take movieid from cmdline arg
484   $movieid = shift || die "Usage : $0 -D <movieid>\n";
485   getMovieData($movieid);
486}
487
488elsif (defined $opt_P) {
489   # take movieid from cmdline arg
490   $movieid = shift || die "Usage : $0 -P <movieid>\n";
491   getMoviePoster($movieid);
492}
493
494elsif (defined $opt_M) {
495   # take query from cmdline arg
496   #$options = shift || die "Usage : $0 -M <query>\n";
497   my $query;
498   my $options = '';
499   foreach $key (0 .. $#ARGV) {
500        $query .= $ARGV[$key]. ' ';
501   }
502   getMovieList($query, $options);
503}
504# vim: set expandtab ts=3 sw=3 :