MythTV  master
topdocumentaryfilm.pl
Go to the documentation of this file.
1 #!/usr/bin/perl
2 # @(#)$Header: /home/mythtv/mythtvrep/scripts/topdocumentaryfilm.pl,v 1.17 2010/07/24 23:28:11 mythtv Exp $
3 # Auric 2010/01/10 http://web.aanet.com.au/auric/
4 #
5 # MythNetvision Grabber Script for topdocumentaryfilm site.
6 #
7 # If you want to alter any of the default settings.
8 # Create/Change $HOME/.mythtv/MythNetvision/userGrabberPrefs/topdocumentaryfilm.cfg
9 # Format of file
10 # player=mplayer
11 # playerargs=-fs -zoom %MEDIAURL%
12 #
13 # Some settings you can have in this are
14 # Print info/progress message: 0 - off, 1 - low ,2 - high
15 # mnvinfo
16 # Info messages go to: 0 = stderr, filename = filename
17 # mnvinfoop
18 # External player to use
19 # player
20 # Args to external player %MEDIAURL% will be replaced with content url
21 # playerargs
22 # External download to use
23 # download
24 # Args to external download %MEDIAURL% will be replaced with content url
25 # downloadargs
26 # A network player like a flash or html5 html. TODO 0.24 May not be approved
27 # netplayer
28 # Type flash or html5
29 # netplayertype
30 # Seconds to cache results Default 72000
31 # cachetime
32 #
33 ################################################################################
34 use strict;
35 use warnings;
36 use Getopt::Std;
37 use LWP::Simple;
38 use HTML::TreeBuilder;
39 use HTML::Entities;
40 use Data::Dumper;
41 use Date::Parse;
42 use Date::Format;
43 use Encode;
44 use Storable;
45 use File::stat;
46 use File::Basename;
47 use FindBin '$Bin', '$Script';
48 use lib "$Bin/nv_perl_libs";
49 use mnvcommonsubs;
50 
51 #################################### Settings #################################
52 # Load from config file. May overwrite above.
53 mnvloadconfig(fileparse($Script, '.pl'), "notused");
54 # SKIP completely skips the video
55 my %autoplay = (
56 'youtube.com' => '&autoplay=1',
57 '220.ro' => '&aplay=true',
58 'megavideo.com' => 'SKIP',
59 'veoh.com' => 'videoAutoPlay=1',
60 'crunchyroll.com' => 'auto_play=true',
61 'mediaservices.myspace.com' => ',AutoPlay=true',
62 );
63 
64 #################################### Globals ##################################
65 my $version = '$Revision: 1.17 $'; $version =~ s/\D*([\d\.]+)\D*/$1/; # rcs tag populated
66 my $command = "topdocumentaryfilm.pl"; my $commandthumbnail = "topdocumentaryfilm.png"; my $author = "Auric";
67 my $site = 'TopDocumentaryFilms';
68 my $description = 'Great collection of documentary movies';
69 my $baseurl = 'http://topdocumentaryfilms.com/';
70 my $baseicon = 'http://www.danaroc.com/ezine_pics_031510_websites.jpg';
71 my $store = "/tmp/.${site}.diritemsref.store";
72 our ($opt_v, $opt_T, $opt_p, $opt_S);
73 my %diritems;
74 
75 #################################### Site Specific Subs ##########################
76 # Build all vid items for all directories
77 # input hash ref to { "directory name" => [array of anonymous hash's] }
78 # anonymous hash {
79 # 'dirthumbnail' => $icon,
80 # 'title' => $title,
81 # 'mythtv:subtitle' => "",
82 # 'author' => $author,
83 # 'pubDate' => $pubDate,
84 # 'description' => $description,
85 # 'link' => $url,
86 # 'player' => $player,
87 # 'playerargs' => $playerargs,
88 # 'download' => $download,
89 # 'downloadargs' => $downloadargs,
90 # 'media:thumbnailurl' => "",
91 # 'media:contenturl' => $contenturl,
92 # 'media:contentlength' => $length,
93 # 'media:contentduration' => "",
94 # 'media:contentwidth' => "",
95 # 'media:contentheight' => "",
96 # 'media:contentlanguage' => $language,
97 # 'rating' => ""
98 # 'mythtv:country' => ""
99 # 'mythtv:season' => ""
100 # 'mythtv:episode' => ""
101 # 'mythtv:customhtml' => ""
102 # }
103 # Basically this hash ref is what you need to build.
104 # input base url
105 # output items found
106 
107 sub builddiritems {
108  my $diritemsref = shift @_;
109  my $baseurl = shift @_;
110 
111  my $dirurlsref = builddirurls($baseurl);
112  my $vidurlsref = buildvidurls($dirurlsref);
113  my $itemsfound = 0;
114  foreach my $dir (keys(%$vidurlsref)) {
115  my $diritemsfound = 0;
116  foreach my $urltitle (@{$vidurlsref->{$dir}}) {
117  my($url, $title) = @{$urltitle};
118  my $found = builditems($diritemsref, $dir, $url, $title);
119  $itemsfound += $found;
120  $diritemsfound += $found;
121  }
122  mnvinfomsg(1, "$dir Items found $diritemsfound");
123  }
124  return $itemsfound;
125 }
126 
127 sub addautoplay {
128  my $link = shift @_;
129 
130  $link = decode_entities($link);
131  unless ($link =~ s/(.*[?&]autoplay=)false(.*)/${1}true${2}/i) {
132  unless ($link =~ s/(.*[?&]autostart=)false(.*)/${1}true${2}/i) {
133  unless ($link =~ s/(.*[?&]aplay=)false(.*)/${1}true${2}/i) {
134  unless ($link =~ s/(.*[?&]autoplay=)0(.*)/${1}1${2}/i) {
135  unless ($link =~ s/(.*[?&]autostart=)0(.*)/${1}1${2}/i) {
136  unless ($link =~ s/(.*[?&]aplay=)0(.*)/${1}1${2}/i) {
137  foreach my $ap (keys(%autoplay)) {
138  if ($link =~ /$ap/) {
139  ($autoplay{$ap}) or return encode_entities($link);
140  ($autoplay{$ap} eq 'SKIP') and return 0;
141  if ($autoplay{$ap} =~ /^[\?\&,]/) {
142  $link .= $autoplay{$ap};
143  } else {
144  if ($link =~ /\?/) {
145  $link .= '&' . $autoplay{$ap};
146  } else {
147  $link .= '?' . $autoplay{$ap};
148  }
149  }
150  return encode_entities($link);
151  }
152  }
153  if ($link =~ /\?/) {
154  $link .= '&' . mnvgetconfig('defaultautoplay');
155  } else {
156  $link .= '?' . mnvgetconfig('defaultautoplay');
157  }
158  return encode_entities($link);
159  }
160  }
161  }
162  }
163  }
164  }
165 }
166 
167 # Collect url's of all the podcasts
168 # input base url
169 # return hash ref to { "directory name" => "url" }
170 sub builddirurls {
171  my $baseurl = shift @_;
172 
173  my %dirurls;
174 
175  mnvinfomsg(1, "Getting $baseurl");
176  my $content = get($baseurl);
177  unless ($content) {
178  die "Could not retrieve $baseurl";
179  }
180  my $tree = HTML::TreeBuilder->new;
181  eval { $tree->parse($content); };
182  if ($@) {
183  die "$baseurl parse failed, $@";
184  }
185  $tree->eof();
186 
187  my @ptrs = $tree->find_by_tag_name('a');
188  foreach my $ptr (@ptrs) {
189  if ($ptr->attr('href') =~ /topdocumentaryfilms.com\/category\//) {
190  my $dir = $ptr->as_trimmed_text();
191  $dirurls{$dir} = mnvcleantext($ptr->attr('href'));
192  }
193  }
194  (keys(%dirurls)) or die "No urls found";
195 
196  return \%dirurls;
197 }
198 
199 # Collect url's to all vids
200 # return hash ref to { "directory name" => "url" }
201 # return hash ref to { "directory name" => [[url,title]] }
202 sub buildvidurls {
203  my $dirurls = shift @_;
204 
205  my %vidurls;
206 
207  foreach my $dir (sort(keys(%$dirurls))) {
208  mnvinfomsg(1, "Getting $dir $dirurls->{$dir}");
209  my $content = get($dirurls->{$dir});
210  unless ($content) {
211  warn "Could not retrieve $dirurls->{$dir}";
212  next;
213  }
214  my $tree = HTML::TreeBuilder->new;
215  eval { $tree->parse($content); };
216  if ($@) {
217  warn "$dirurls->{$dir} parse failed, $@";
218  next;
219  }
220  $tree->eof();
221 
222  my @ptrs = $tree->find_by_tag_name('h2');
223  (@ptrs) or next;
224 
225  foreach my $ptr (@ptrs) {
226  my $a = $ptr->find_by_tag_name('a');
227  ($a) or next;
228  my $url = mnvcleantext($a->attr('href'));
229  my $title = mnvcleantext($a->as_trimmed_text());
230  push(@{$vidurls{$dir}}, [$url, $title]);
231  }
232  }
233  return \%vidurls;;
234 }
235 
236 # Build all items
237 # input hash ref to { "directory name" => [array of anonymous hash's] }
238 # input "directory name"
239 # input url
240 # input title
241 # output number of items added
242 sub builditems {
243  my $diritemsref = shift @_;
244  my $dir = shift @_;
245  my $url = shift @_;
246  my $title = shift @_;
247 
248  mnvinfomsg(2, "Getting $dir Episode $url");
249  my $content = get($url);
250  unless ($content) {
251  warn "Could not retrieve $url";
252  return 0;
253  }
254  my $tree = HTML::TreeBuilder->new;
255  eval { $tree->parse($content); };
256  if ($@) {
257  warn "$url parse failed, $@";
258  return 0;
259  }
260  $tree->eof();
261 
262  my $desc = ""; my $icon = $baseicon; my @links;
263  my $pc = $tree->look_down('class', 'postContent');
264  ($pc) or return 0;
265  my $ptr = $pc->find_by_tag_name('p');
266  ($ptr) and $desc = mnvcleantext($ptr->as_trimmed_text());
267  $ptr = $pc->find_by_tag_name('img');
268  ($ptr) and $icon = mnvcleantext($ptr->attr('src'));
269  my @ptrs = $pc->find_by_tag_name('embed');
270  foreach my $ptr (@ptrs) {
271  my $l = mnvcleantext($ptr->attr('src'));
272  ($l) or next;
273  my $lap = addautoplay($l);
274  if ($lap) {
275  push(@links, $lap);
276  } else {
277  mnvinfomsg(2, "Skipped $l");
278  }
279  }
280  (@links) or return 0;
281 
282  my $country = "";
283  my $addpart = 1;
284  my $oldtitle = $title;
285  foreach my $link (@links) {
286  if ($#links > 0) {
287  $title = "$oldtitle Pt $addpart";
288  $addpart++;
289  }
290  push(@{$diritemsref->{$dir}}, {
291  'dirthumbnail' => $icon,
292  'title' => $title,
293  'mythtv:subtitle' => "",
294  'author' => "",
295  'pubDate' => "",
296  'description' => $desc,
297  'link' => $link,
298  'player' => mnvgetconfig('player'),
299  'playerargs' => mnvgetconfig('playerargs'),
300  'download' => mnvgetconfig('download'),
301  'downloadargs' => mnvgetconfig('downloadargs'),
302  'media:thumbnailurl' => $icon,
303  'media:contenturl' => $link,
304  'media:contentlength' => "",
305  'media:contentduration' => "",
306  'media:contentwidth' => "",
307  'media:contentheight' => "",
308  'media:contentlanguage' => "",
309  'rating' => "",
310  'mythtv:country' => $country,
311  'mythtv:season' => "",
312  'mythtv:episode' => "",
313  'mythtv:customhtml' => "no"
314  });
315 
316  mnvinfomsg(2, "Added $title");
317  }
318  return $#links + 1;
319 }
320 
321 #################################### Main #####################################
322 # If you copy this for another site, hopefully these won't need to changed
323 getopts('vtTp:S:');
324 
325 if ($opt_v) {
326  ($mnvcommonsubs::netvisionver == 23) and print "$site|TS\n";
327  ($mnvcommonsubs::netvisionver > 23) and mnvprintversion($site, $command, $author, $commandthumbnail, $version, $description);
328  exit 0;
329 }
330 
331 my $type; my $page = 1; my $search = "";
332 if ($opt_T) {
333  $type = "tree";
334 } elsif ($opt_S) {
335  $type = "search";
336  $search = $opt_S;
337  ($opt_p) and $page = $opt_p;
338 } else {
339  print STDERR "Must have -T or -S option\n";
340  exit 1;
341 }
342 
343 $SIG{'INT'} = \&mnvcleanexit;
344 $SIG{'HUP'} = \&mnvcleanexit;
345 $SIG{'TERM'} = \&mnvcleanexit;
346 $SIG{'QUIT'} = \&mnvcleanexit;
347 
348 my $diritemsref = \%diritems;
349 my $totalitems = 0; my $filtereditems = 0;
350 my $ss = stat($store);
351 if (($ss) && (time() - $ss->mtime) < mnvgetconfig('cachetime')) {
352  eval { $diritemsref = retrieve($store); };
353  if ($@) {
354  die "Could not load store, $@";
355  }
356  $totalitems = mnvnumresults($diritemsref);
357  mnvinfomsg(1, "Using previous run data");
358 } else {
359  $totalitems = builddiritems($diritemsref, $baseurl);
360  eval { store($diritemsref, $store); };
361  if ($@) {
362  warn "Could not save store, $@";
363  }
364 }
365 
366 mnvrssheader();
367 print '<channel>
368  <title>'.$site.'</title>
369  <link>'.$baseurl.'</link>
370  <description>'.$description.'</description>'."\n";
371 if ($type eq "search") {
372  $filtereditems = mnvfilter($diritemsref, $search);
373  mnvprintsearch($diritemsref, $page);
374  mnvinfomsg(1, "Total Items match $filtereditems of $totalitems");
375 } else {
376  mnvprinttree($diritemsref);
377  mnvinfomsg(1, "Total Items found $totalitems");
378 }
379 print "</channel>\n";
380 mnvrssfooter();
381 
382 mnvcleanexit 0;