#!/usr/bin/perl -w
#
# This perl script is intended to perform movie data lookups in german based on
# the www.ofdb.de website
#
# For more information on MythVideo's external movie lookup mechanism, see
# the README file in this directory.
#
# Author: Xavier Hervy (maxpower44 AT tiscali DOT fr)
#
use LWP::Simple; # libwww-perl providing simple HTML get actions
use HTML::Entities;
use URI::Escape;
#use utf8;
use vars qw($opt_h $opt_r $opt_d $opt_i $opt_v $opt_D $opt_M $opt_P);
use Getopt::Std;
$title = "Ofdb Query";
$version = "v1.00";
$author = "Xavier Hervy";
# display usage
sub usage {
print "usage: $0 -hdrviMPD [parameters]\n";
print " -h help\n";
print " -d debug\n";
print " -r dump raw query result data only\n";
print " -v display version\n";
print " -i display info\n";
print "\n";
print " -M get movie list\n";
print " -D get movie data\n";
print " -P get movie poster\n";
exit(-1);
}
# display 1-line of info that describes the version of the program
sub version {
print "$title ($version) by $author\n"
}
# display 1-line of info that can describe the type of query used
sub info {
print "Performs queries using the www.ofdb.de website.\n";
}
# display detailed help
sub help {
version();
info();
usage();
}
# returns text within 'data' between 'beg' and 'end' matching strings
sub parseBetween {
my ($data, $beg, $end)=@_; # grab parameters
my $ldata = lc($data);
my $start = index($ldata, lc($beg)) + length($beg);
my $finish = index($ldata, lc($end), $start);
#my $ldata = $data;
#my $start = index($ldata, $beg) + length($beg);
#my $finish = index($ldata, $end, $start);
#print "$start $finish\n";
if ($start != (length($beg) -1) && $finish != -1) {
my $result = substr($data, $start, $finish - $start);
# dont use decode entities &npsp; => sp�ial characters bug in html::entities ?
#decode_entities($result);
return removenbsp($result);
}
return "";
}
# use to replace by " " (instead of decode_entities)
sub removenbsp {
my ($data)=@_; # grab parameters
my $ldata = lc($data);
my $start = index($ldata, " ");
while ($start != -1){
$data = substr($data, 0, $start). " " .substr($data, $start+6, length($data));
$ldata = lc($data);
$start = index($ldata, " ");
}
return $data;
}
# returns text within 'data' without tag
sub removeTag {
my ($data)=@_; # grab parameters
my $ldata = lc($data);
my $start = index($ldata, "<");
my $finish = index($ldata, ">", $start)+1;
while ($start != -1 && $finish != -1){
$data = substr($data, 0, $start).substr($data, $finish, length($data));
$ldata = lc($data);
$start = index($ldata, "<");
$finish = index($ldata, ">", $start)+1;
}
return $data;
}
# get Movie Data
sub getMovieData {
my ($movieid)=@_; # grab movieid parameter
if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}
# get the search results page
my $request = "http://www.ofdb.de/view.php?page=film&fid=" . $movieid;
if (defined $opt_d) { printf("# request: '%s'\n", $request); }
my $response = get $request;
#print "$response\n";
if (defined $opt_r) { printf("%s", $response); }
# parse title and year
my $title = parseBetween($response, "","");
#print "titre = $title\n";
my $year = parseBetween($response,"");
# $year = parseBetween($year,"(",")");
# parse director
my $director = parseBetween($response,"Regie:","");
$director = parseBetween($director,"\">","");
#print "Director $director";
$director = removeTag($director);
# parse user rating
my $userrating = parseBetween($response, "Note: ", " ");
# parse cast
my $cast = parseBetween($response,"Darsteller:","");
#$cast = parseBetween($cast,"Daten\">","...");
$cast =~ s/
");
$genres = parseBetween($genres,"class=\"Daten\">","");
$countries = parseBetween($countries,"Daten\">","");
$countries =~ s/
[mehr]");
my $runtime = 0;
my $movierating = "";
my $writer = "";
#runtime provide from german.imdb.com
my $urlimdb = parseBetween($response,"http://german.imdb.com/Title?","\" target");
if ($urlimdb eq ""){
}else{
$request = "http://german.imdb.com/Title?".$urlimdb;
$response = get $request;
#parse movie length
$runtime = parseBetween($response,"Länge:\n"," min ");
#parse movie rating
$movierating = parseBetween($response,"Altersfreigabe:\n"," \n
");
$movierating = removeTag($movierating);
#parse writer (only the first)
$writer = parseBetween($response,"\n\n
\n","
");
$writer = parseBetween($writer,">","");
}
# parse plot
if ($ploturl eq ""){
}
else{
$request = "http://www.ofdb.de/view.php?page=inhalt" . $ploturl;
$response = get $request;
$response = parseBetween($response,"
","
");
if ($response eq ""){
}else{
$plot=$response;
}
}
# output fields (these field names must match what MythVideo is looking for)
print "Title:$title\n";
print "Year:$year\n";
print "Director:$director\n";
print "Plot:$plot\n";
print "UserRating:$userrating\n";
print "MovieRating:$movierating\n";
print "Runtime:$runtime\n";
print "Writers: $writer\n";
print "Cast: $cast\n";
print "Genres:$genres\n";
print "Countries:$countries\n";
}
# dump Movie Poster
sub getMoviePoster {
my ($movieid)=@_; # grab movieid parameter
if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}
# get the search results page
my $request = "http://www.ofdb.de/view.php?page=film&fid=" . $movieid;
if (defined $opt_d) { printf("# request: '%s'\n", $request); }
my $response = get $request;
if (defined $opt_r) { printf("%s", $response); }
my $uriofdb = "";
$uriofdb = parseBetween($response, "Linke", "Aufgerufen");
$uriofdb = parseBetween($uriofdb,"src=\"","\" alt");
if ($uriofdb eq "images/film/na.gif") {
$uriofdb = "";
}else{
$uriofdb = "http://www.ofdb.de/$uriofdb\n";
}
my $urlimdb = parseBetween($response,"http://german.imdb.com/Title?","\" target");
my $uri="";
if ($urlimdb eq ""){
}else{
# get the search results page
my $request = "http://www.imdb.com/title/tt" . $urlimdb . "/posters";
if (defined $opt_d) { printf("# request: '%s'\n", $request); }
my $response = get $request;
if (defined $opt_r) { printf("%s", $response); }
# look for references to impawards.com posters - they are high quality
my $site = "http://www.impawards.com";
my $impsite = parseBetween($response, "".$site);
if ($impsite) {
$impsite = $site . $impsite;
if (defined $opt_d)
{ print "# Searching for poster at: ".$impsite."\n"; }
my $impres = get $impsite;
if (defined $opt_d) { printf("# got %i bytes\n", length($impres)); }
if (defined $opt_r) { printf("%s", $impres); }
$uri = parseBetween($impres, " |
1.",
"
");
#print "$exact_matches\n";
# parse movie list from matches
my $beg = "");
$title = parseBetween($sub,">","");
$title = removeTag($title);
$moviename = removeTag($sub);
my ($movieyear)= $moviename =~/\((\d+)\)/;
if ($movieyear){$title = $title." (".$movieyear.")"; }
$moviename=$title ;
# advance data to next movie
$data = substr($data, - (length($data) - $finish));
$start = index($data, $beg);
$finish = index($data, $end, $start + 1);
# add to array
$movies[$count++] = $movienum . ":" . $moviename;
}
# display array of values
for $movie (@movies) { print "$movie\n"; }
# }
# }
}
#
# Main Program
#
# parse command line arguments
getopts('ohrdivDMP');
# print out info
if (defined $opt_v) { version(); exit 1; }
if (defined $opt_i) { info(); exit 1; }
# print out usage if needed
if (defined $opt_h || $#ARGV<0) { help(); }
if (defined $opt_D) {
# take movieid from cmdline arg
$movieid = shift || die "Usage : $0 -D \n";
getMovieData($movieid);
}
elsif (defined $opt_P) {
# take movieid from cmdline arg
$movieid = shift || die "Usage : $0 -P \n";
getMoviePoster($movieid);
}
elsif (defined $opt_M) {
# take query from cmdline arg
$options = shift || die "Usage : $0 -M [options] \n";
$query = shift;
if (!$query) {
$query = $options;
$options = "";
}
getMovieList($query, $options);
}
|