Ticket #3683: patch_imdb.pl

File patch_imdb.pl, 3.2 KB (added by jmortensen@…, 17 years ago)

patch

Line 
1Index: scripts/imdb.pl
2===================================================================
3--- scripts/imdb.pl     (revision 13165)
4+++ scripts/imdb.pl     (working copy)
5@@ -94,6 +94,9 @@
6    my $ldata = lc($data);
7    my $start = index($ldata, lc($beg)) + length($beg);
8    my $finish = index($ldata, lc($end), $start);
9+
10+   # if (defined $opt_d) { printf("ParseBetween -> $beg ($start) $end ($finish)\n"); }
11+
12    if ($start != (length($beg) -1) && $finish != -1) {
13       my $result = substr($data, $start, $finish - $start);
14       # return w/ decoded numeric character references
15@@ -130,13 +133,12 @@
16       $title = $1;
17    }
18 
19-   # parse director
20-   my $director = parseBetween($response, ">Directed by</h5>", "/a><br/>");
21-   $director = parseBetween($director, "/\">", "<");
22+   # parse director(s)
23+   my $director = parseBetween($response, ">Director", ">Writer");
24+   $director = join(",", ($director =~ m/$name_link_pat/g));
25 
26-   # parse writer
27-   # (Note: this takes the 'first' writer, may want to include others)
28-   my $data = parseBetween($response, ">Writing credits <a href=\"/wga\">(WGA)</a></h5>", "\n<br/>");
29+   # parse writer(s)
30+   my $data = parseBetween($response, ">Writer", ">Release");
31    my $writer = join(",", ($data =~ m/$name_link_pat/g));
32 
33    # parse plot
34@@ -417,16 +419,23 @@
35    #    possible matches are grouped in several catagories: 
36    #        exact, partial, and approximate
37    my $popular_results = parseBetween($response, "<b>Popular Titles</b>",
38-                                              "</ol>");
39+                                              "<p>");
40    my $exact_matches = parseBetween($response, "<b>Titles (Exact Matches)</b>",
41-                                              "</ol>");
42+                                              "<p>");
43    my $partial_matches = parseBetween($response, "<b>Titles (Partial Matches)</b>",
44-                                              "</ol>");
45+                                              "<p>");
46 #   my $approx_matches = parseBetween($response, "<b>Approximate Matches</b>",
47 #                                               "</ol>");
48+
49+
50+
51+#print "--------------------------\n";
52+#print $popular_results, "\n";
53+#print "EXACT $exact_matches\n";
54+
55    # parse movie list from matches
56-   my $beg = "<li>";
57-   my $end = "</li>";
58+   my $beg = "<td";
59+   my $end = "</td>";
60    my $count = 0;
61    my @movies;
62 
63@@ -445,7 +454,14 @@
64    my $year;
65    my $type;
66    my $title;
67+   
68+   if (defined $opt_d) {print "data = $data\n";};
69+   if (defined $opt_d) {print "Item start tag = $beg\n";};
70+   if (defined $opt_d) {print "start = $start finish = $finish\n";};
71+   
72    while ($start != -1 && $start < length($data)) {
73+   
74+      if (defined $opt_d) { print "Item start = $start ";};
75       $start += length($beg);
76       my $entry = substr($data, $start, $finish - $start);
77       $start = index($data, $beg, $finish + 1);
78@@ -465,6 +481,7 @@
79       if ($lhs =~ m/<a href="\/title\/tt(\d+)\/.*\">(.+)<\/a>/i) {
80           $movienum = $1;
81           $title = $2;
82+          if (defined $opt_d) { print "Movie $movienum, Title $title";};
83       } else {
84            if (defined $opt_d) {
85                print("Unrecognized entry format\n");