Opened 13 years ago

Last modified 13 years ago

#9074 closed enhancement

allocine script for new metadata schema and allocine API — at Initial Version

Reported by: Alexandra Lepercq <alexandra@…> Owned by:
Priority: minor Milestone: 0.25
Component: Plugin - MythVideo Version: Master Head
Severity: medium Keywords:
Cc: Ticket locked: no

Description

Allocine has release an API for the metadata (thanks to http://wiki.gromez.fr/dev/api/allocine): http://api.allocine.fr/xml/movie?code=$movieid&partner=3

I have made some modification based on the allocine.pl script from Xavier Hervy to be consistant with the allocine API and the mythtv 0.24 metadata schema

I hope this helps

#!/usr/bin/perl -w

# # This perl script is intended to perform movie data lookups in french based on # the www.allocine.fr website # # For more information on MythVideo?'s external movie lookup mechanism, see # the README file in this directory. # # Original author: Xavier Hervy (maxpower44 AT tiscali DOT fr)

# changes: # 20-10-2009: Geoffroy Geerseau ( http://www.soslinux.net : jamdess AT soslinux DOT net ) # Modified for the new allocine templates # 25-10-2009: Geoffroy Geerseau ( http://www.soslinux.net : jamdess AT soslinux DOT net ) # Poster download correction # Userrating correction # 28-10-2009: Robert McNamara? (Myth Dev) # Fix issues in above patches-- files should never be downloaded to /tmp. # Convert script to output in new grabber output format for .23. Leave backwards compat. # 02-11-2009: Geoffroy Geerseau # Allocine have, once again, change their templates... # 06-08-2010: Alexandra Lepercq # Allocine have, once again, change their templates... # Add some data from api.allocine.fr (thanks to http://wiki.gromez.fr/dev/api/allocine) # http://api.allocine.fr/xml/movie?code=$movieid&partner=3

use File::Basename; use File::Copy; use lib dirname($0); use Encode; use utf8; use Encode 'from_to'; use MythTV::MythVideoCommon?;

use vars qw($opt_h $opt_r $opt_d $opt_i $opt_v $opt_D $opt_l $opt_M $opt_P $opt_originaltitle $opt_casting $opt_u_dummy); use Getopt::Long;

$title = "Allocine Query"; $version = "v2.06"; $author = "Xavier Hervy"; push(@MythTV::MythVideoCommon::URL_get_extras, ($title, $version));

binmode(STDOUT, ":utf8");

# display usage sub usage {

print "usage: $0 -hviocMPD [parameters]\n"; print " -h, --help help\n"; print " -v, --version display version\n"; print " -i, --info display info\n"; print " -o, --originaltitle concatenate title and original title\n"; print " -c, --casting with -D option, grap the complete actor list (much slower)\n"; print "\n"; print " -M <query>, --movie query> get movie list\n"; print " -D <movieid>, --data <movieid> get movie data\n"; print " -P <movieid>, --poster <movieid> get movie poster\n"; exit(-1);

}

# display 1-line of info that describes the version of the program sub version {

print "$title ($version) by $author\n"

}

# display 1-line of info that can describe the type of query used sub info {

print "Performs queries using the www.allocine.fr website.\n";

}

# display detailed help sub help {

version(); info(); usage();

}

# returns text within 'data' without tag sub removeTag {

my ($data)=@_; # grab parameters

my $ldata = lc($data); my $start = index($ldata, "<"); my $finish = index($ldata, ">", $start)+1; while ($start != -1 && $finish != -1){

$data = substr($data, 0, $start).substr($data, $finish, length($data)); $ldata = lc($data); $start = index($ldata, "<"); $finish = index($ldata, ">", $start)+1;

} return $data;

}

# get Movie Data sub getMovieData {

my ($movieid)=@_; # grab movieid parameter if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}

# get Movie MetaData? from api.allocine $requestAPI = "http://api.allocine.fr/xml/movie?code=$movieid&partner=3"; $responseAPI = myth_url_get($requestAPI); from_to($responseAPI,'utf-8','iso-8859-1');

# get the search results page my $request = "http://www.allocine.fr/film/fichefilm_gen_cfilm=" . $movieid . ".html"; my $allocineurl = $request; if (defined $opt_d) { printf("# request: '%s'\n", $request); } my ($rc, $response) = myth_url_get($request); from_to($response,'utf-8','iso-8859-1');

# parse Title and Year

# my $title = parseBetween($response, "<title>", "</title>"); # $title =~ s/\s*-\s*AlloCin?.*; # $title =~ s/(.*)\(.*$/$1/; # $title =~ s/\s*(.*)\s*$/$1/; # my $original_title = parseBetween($response, "Titre original :","<br"); # $original_title = trim(removeTag($original_title)); # if (defined $opt_originaltitle){ # if ($original_title ne ""){ # $title = $title . " (" . $original_title . ")"; # } # } # $title = removeTag($title); # my $year = parseBetween(parseBetween($response,"/film/tous/decennie","/a>"),'>','<');

my $titleApi = parseBetween($responseAPI,"<title>","</title>"); my $originaltitleApi = parseBetween($responseAPI,"<originalTitle>","</originalTitle>"); my $yearApi = parseBetween($responseAPI,"<productionYear>","</productionYear>");

# parse Director

# my $tempresponse = $response; # my $director = parseBetween($tempresponse,"Réalisé par ","</a></span>"); # $director = removeTag($director);

# my $directorApi = parseBetween($responseAPI,"<directors>","</directors>");

# parse Plot

# my $plot = parseBetween($response,"Synopsis : </span>","</p>"); # $plot =~ s/\ng; # $plot = trim(removeTag($plot));

my $plotApi = parseBetween($responseAPI,"<synopsis>","</synopsis>");

# parse User Rating

# my $userrating=0; # my $tmpratings = parseBetween(parseBetween($response,"/film/critiquepublic_gen_cfilm=$movieid.html\"><img", "</span></p></div>"),'(',')'); # $tmpratings =~ s/,/./gm; # if($tmpratings =~ /(\d+\.?\d*|\.\d+)$/ && !$tmpratings eq "") # { # $userrating = int($tmpratings*2.5); # } # else # { # $userrating = ""; # }

my $userratingOrig = parseBetween($responseAPI,"<userRating>","</userRating>"); $userratingApi = int($userratingOrig * 2.5);

# parse Rating my $movierating = parseBetween($response,"Interdit aux moins de ","ans"); if (!($movierating eq ""))

{ $movierating = "Interdit -" . $movierating . "ans";}

else

{

$movierating = parseBetween($response,"Visible ","enfants");

if (!($movierating eq "")){ $movierating = "Enfants";};

}

my $movieratingTout = parseBetween($responseAPI,"<ratingStats>","</ratingStats>");

# parse Movie length

# my $runtime = trim(parseBetween($response,"Durée :","min")); # my $heure; # my $minutes; # ($heure,$minutes)=($runtime=~/[\d]*(\d+)[\d]*(\d*)/); # if (!$heure){ $heure = 0; } # if (!$minutes){ # $runtime = $heure * 60; # }else{ # $runtime = $heure * 60 + $minutes; # }

my $runtimeOrig = parseBetween($responseAPI,"<runtime>","</runtime>"); $runtimeApi = $runtimeOrig / 60;

# parse Cast

# my $castchunk; # $castchunk = parseBetween($response, " Avec ","<a href=\"/film/casting_gen_cfilm=$movieid.html\" >plus</a>"); # my $cast = ""; # $cast = trim(join(',', removeTag($castchunk)));

my $castApi = parseBetween($responseAPI,"<casting>","</casting>"); $castApi =~ s!<castMember>!\n!g; $castApi =~ s!</castMember>!/>!g; $castApi =~ s/person code/person name/g; $castApi =~ s!</person>!"!g; $castApi =~ s/<activity code/ job/g; $castApi =~ s!</activity>!"!g; $castApi =~ s/<role>/ character="/g; $castApi =~ s!</role>!"!g; $castApi =~ s[0-9]!!g; $castApi =~ s!">!!g; $castApi =~ s/<picture href/ picture/g; $castApi =~ s!</picture>!"!g; $castApi =~ s!picture="http://images.allocine.fr/medias/nmedia/////.jpg"!!g; $castApi =~ s!Réalisateur!director!g; $castApi =~ s!Acteur!actor!g; $castApi =~ s!Producteur!producer!g; $castApi =~ s!Compositeur!composer!g;

#Genres

# my $genres = parseBetween($response,"Genre :","<br"); # $genres =~ s/\s*\n*(.*)\s*$/ $1/; # $genres = trim(removeTag($genres)); # $genres =~ s/\s*\n*(.*)\s*$/ $1/;

my $genreApi = parseBetween($responseAPI,"<genreList>","</genreList>"); $genreApi =~ s/genre code/category name/g; $genreApi =~ s!</genre>!"/>\n!g; $genreApi =~ s[0-9]!!g; $genreApi =~ s!">!!g;

# $genreApi =~ s!Musical!Comédie musicale!g; # $genreApi =~ s!Action!Aventure, Action!g; # $genreApi =~ s!Aventure!!g;

#Countries

# my $countries = parseBetween($response,"Long-métrage","."); # $countries = trim(removeTag($countries)); # $countries =~ s/\s*(.*)\s*$/ $1/; # $countries = trim($countries); # $countries =~ s/\ngm; # $countries =~ s/\sgm; # $countries =~ s/,/, /gm; # if ($countries eq "allemand") { $countries = "Allemagne"; } # if ($countries eq "américain") { $countries = "États-Unis"; } # if ($countries eq "autrichien") { $countries = "Autriche"; } # if ($countries eq "britannique") { $countries = "Royaume Uni"; } # if ($countries eq "canadien") { $countries = "Canada"; } # if ($countries eq "français") { $countries = "France"; } # if ($countries eq "italien") { $countries = "Italie"; } # if ($countries eq "russe") { $countries = "Russie"; }

my $countryApi = parseBetween($responseAPI,"<nationalityList>","</nationalityList>"); $countryApi =~ s/nationality code/country name/g; $countryApi =~ s!</nationality>!"/>\n!g; $countryApi =~ s[0-9]!!g; $countryApi =~ s!">!!g;

# $countryApi =~ s!Grande-Bretagne!Royaume Uni!g; # $countryApi =~ s!U.S.A.!États-Unis!g;

# parse for Coverart

# my $mediafile = parseBetween($response,"<a href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\" >"); # $covrequest = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile; # ($rc, $covresponse) = myth_url_get($covrequest); # my $uri = parseBetween(parseBetween($covresponse,"<div class=\"tac\" style=\"\">","</div>"),"<img src=\"","\" alt");

$request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/"; ($rc, $response) = myth_url_get($request); my $mediafile = parseBetween($response,"<a href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\" >"); $request2 = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile; ($rc, $response2) = myth_url_get($request2); $uri = trim(parseBetween($response2,"<a Target=\"_blank\" Class=\"fs11\" href=\"","\">Agrandir</a>")); if ($uri eq "") {

$request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/"; ($rc, $response) = myth_url_get($request); my $tmp_uri = parseBetween($response, "<a href=\"/film/fichefilm-".$movieid."/affiches/\">"," alt="); $tmp_uri =~ s/\n/ /gm; $uri = trim(parseBetween($tmp_uri,"<img src='h","'")); if($uri ne "") {

$uri = "h$uri";

}

} # if no picture was found, just download the empty poster if($uri eq ""){

$uri = "http://images.allocine.fr/r_160_214/commons/emptymedia/AffichetteAllocine.gif";

}

# output fields (these field names must match what MythVideo? is looking for)

# print "Title:$title\n"; # if (!(defined $opt_originaltitle)){ # print "OriginalTitle:$original_title\n"; # } # print "URL:$allocineurl\n"; # print "Year:$year\n"; # print "Director:$director\n"; # print "Plot:$plot\n"; # print "UserRating:$userrating\n"; # print "MovieRating:$movierating\n"; # print "Runtime:$runtime\n"; # print "Cast:$cast\n"; # print "Genres:$genres\n"; # print "Countries:$countries\n"; # print "Coverart: $uri\n";

# print "\n"; # print "OriginaltitleApi:$originaltitleApi\n"; # print "MovieratingTout:$movieratingTout\n"; # print "\n";

# MetaData? output

print "<?xml version='1.0' encoding='UTF-8'?>\n"; print "<metadata>\n";

print "<item>\n";

print "<inetref>$movieid</inetref>\n"; print "<title>$titleApi</title>\n"; print "<language>fr</language>\n"; print "<description>$plotApi</description>\n"; print "<countries>\n";

# print "<country name=\"$countries\"/>\n";

print "$countryApi";

print "</countries>\n"; print "<categories>\n";

# print "<category name=\"$genres\"/>\n";

print "$genreApi";

print "</categories>\n"; print "<userrating>$userratingApi</userrating>\n";

# print "<movierating>$movierating</movierating>\n";

print "<year>$yearApi</year>\n"; print "<runtime>$runtimeApi</runtime>\n"; print "<homepage>$allocineurl</homepage>\n";

# print "<trailerURL>$bandeannonceurl</trailerURL>\n";

print "<people>";

# print "<person name=\"$director\" job=\"Director\"/>\n"; # print "<person name=\"$cast\" job=\"Actor\"/>\n";

print "$castApi";

print "</people>\n"; print "<images>\n";

print "<image type=\"coverart\" url=\"$uri\"/>\n";

# print "<image type=\"fanart\" url=\"$fanarturi\"/>\n"; # print "<image type=\"screenshot\" url=\"$screenshoturi\"/>\n"; # print "<image type=\"banner\" url=\"$banneruri\"/>\n";

print "</images>\n";

print "</item>\n";

print "</metadata>\n";

}

# dump Movie Poster sub getMoviePoster {

my ($movieid)=@_; # grab movieid parameter if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}

# get the search results page

my $request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/"; if (defined $opt_d) { printf("# request: '%s'\n", $request); } my ($rc, $response) = myth_url_get($request); my $mediafile = parseBetween($response,"<a href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\" >");

$request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile; ($rc, $response) = myth_url_get($request); my $uri = parseBetween(parseBetween($response,"<div class=\"tac\" style=\"\">","</div>"),"<img src=\"","\" alt"); if ($uri eq "") {

$request = "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/"; ($rc, $response) = myth_url_get($request); my $tmp_uri = parseBetween($response, "<a href=\"/film/fichefilm-".$movieid."/affiches/\">"," alt="); $tmp_uri =~ s/\n/ /gm; $uri = trim(parseBetween($tmp_uri,"<img src='h","'")); if($uri ne "") {

$uri = "h$uri";

} print "$uri\n";

}

# if no picture was found, just download the empty poster if($uri eq ""){

$uri = "http://images.allocine.fr/r_160_214/commons/emptymedia/AffichetteAllocine.gif";

}

print "$uri\n";

}

sub getMovieList {

my ($filename, $options) = @_; # grab parameters

my $query = cleanTitleQuery($filename); if (!$options) { $options = ""; } if (defined $opt_d) {

printf("# query: '%s', options: '%s'\n", $query, $options);

}

# get the search results page my $request = "http://www.allocine.fr/recherche/1/?q=$query"; if (defined $opt_d) { printf("# request: '%s'\n", $request); } my ($rc, $response) = myth_url_get($request); from_to($response,'utf-8','iso-8859-1'); $response =~ s/\ng; # extract possible matches # possible matches are grouped in several catagories: # exact, partial, and approximate my $exact_matches = $response; # parse movie list from matches my $beg = "<div style=\"margin-top:-5px;\">"; my $end = "<span class=\"fs11\">";

my @movies;

my $data = $exact_matches; if ($data eq "") {

if (defined $opt_d) { printf("# no results\n"); }

} else {

my $start = index($data, $beg); my $finish = index($data, $end, $start);

my $title; my $movienum; my $moviename; while ($start != -1) {

$start += length($beg); my $sub1 = substr($data, $start, $finish - $start); $sub1 =~ s/(.*)\(.*$/$1/; $moviename = trim(removeTag($sub1)); $movienum = parseBetween($sub1,"<a href='/film/fichefilm_gen_cfilm=",".html");

$title = removeTag($moviename); $moviename = removeTag($moviename); my ($movieyear)= $moviename =~/\((\d+)\)/; if ($movieyear) {

$title = $title." (".$movieyear.")";

} $moviename=$title ;

# advance data to next movie $data = substr($data, - (length($data) - $finish)); $start = index($data, $beg); $finish = index($data, $end, $start);

# add to array push(@movies, "$movienum:$moviename");

}

# display array of values for $movie (@movies) {

print "$movie\n";

}

}

}

# # Main Program #

# parse command line arguments

GetOptions?( "utf8" => \$opt_u_dummy,

"version" => \$opt_v, "info" => \$opt_i, "language" => \$opt_l, "originaltitle" => \$opt_originaltitle, "casting" => \$opt_casting, "Data" => \$opt_D, "Movie" => \$opt_M, "Poster" => \$opt_P );

# print out info if (defined $opt_v) { version(); exit 1; } if (defined $opt_i) { info(); exit 1; } if (defined $opt_l) {

my $lang = shift;

}

# print out usage if needed

if (defined $opt_h
$#ARGV<0) { help(); }

if (defined $opt_D) {

# take movieid from cmdline arg

$movieid = shift
die "Usage : $0 -D <movieid>\n";

getMovieData($movieid);

}

elsif (defined $opt_P) {

# take movieid from cmdline arg

$movieid = shift
die "Usage : $0 -P <movieid>\n";

getMoviePoster($movieid);

}

elsif (defined $opt_M) {

# take query from cmdline arg

#$options = shift
die "Usage : $0 -M <query>\n";

my $query; my $options = ; foreach $key (0 .. $#ARGV) {

$query .= $ARGV[$key]. ' ';

} getMovieList($query, $options);

} # vim: set expandtab ts=3 sw=3 :

Change History (1)

Changed 13 years ago by Alexandra Lepercq <alexandra@…>

Attachment: allocine.pl added

allocine script

Note: See TracTickets for help on using tickets.