1 | Index: scripts/imdb.pl |
---|
2 | =================================================================== |
---|
3 | --- scripts/imdb.pl (revision 13165) |
---|
4 | +++ scripts/imdb.pl (working copy) |
---|
5 | @@ -94,6 +94,9 @@ |
---|
6 | my $ldata = lc($data); |
---|
7 | my $start = index($ldata, lc($beg)) + length($beg); |
---|
8 | my $finish = index($ldata, lc($end), $start); |
---|
9 | + |
---|
10 | + # if (defined $opt_d) { printf("ParseBetween -> $beg ($start) $end ($finish)\n"); } |
---|
11 | + |
---|
12 | if ($start != (length($beg) -1) && $finish != -1) { |
---|
13 | my $result = substr($data, $start, $finish - $start); |
---|
14 | # return w/ decoded numeric character references |
---|
15 | @@ -130,13 +133,12 @@ |
---|
16 | $title = $1; |
---|
17 | } |
---|
18 | |
---|
19 | - # parse director |
---|
20 | - my $director = parseBetween($response, ">Directed by</h5>", "/a><br/>"); |
---|
21 | - $director = parseBetween($director, "/\">", "<"); |
---|
22 | + # parse director(s) |
---|
23 | + my $director = parseBetween($response, ">Director", ">Writer"); |
---|
24 | + $director = join(",", ($director =~ m/$name_link_pat/g)); |
---|
25 | |
---|
26 | - # parse writer |
---|
27 | - # (Note: this takes the 'first' writer, may want to include others) |
---|
28 | - my $data = parseBetween($response, ">Writing credits <a href=\"/wga\">(WGA)</a></h5>", "\n<br/>"); |
---|
29 | + # parse writer(s) |
---|
30 | + my $data = parseBetween($response, ">Writer", ">Release"); |
---|
31 | my $writer = join(",", ($data =~ m/$name_link_pat/g)); |
---|
32 | |
---|
33 | # parse plot |
---|
34 | @@ -417,16 +419,23 @@ |
---|
35 | # possible matches are grouped in several catagories: |
---|
36 | # exact, partial, and approximate |
---|
37 | my $popular_results = parseBetween($response, "<b>Popular Titles</b>", |
---|
38 | - "</ol>"); |
---|
39 | + "<p>"); |
---|
40 | my $exact_matches = parseBetween($response, "<b>Titles (Exact Matches)</b>", |
---|
41 | - "</ol>"); |
---|
42 | + "<p>"); |
---|
43 | my $partial_matches = parseBetween($response, "<b>Titles (Partial Matches)</b>", |
---|
44 | - "</ol>"); |
---|
45 | + "<p>"); |
---|
46 | # my $approx_matches = parseBetween($response, "<b>Approximate Matches</b>", |
---|
47 | # "</ol>"); |
---|
48 | + |
---|
49 | + |
---|
50 | + |
---|
51 | +#print "--------------------------\n"; |
---|
52 | +#print $popular_results, "\n"; |
---|
53 | +#print "EXACT $exact_matches\n"; |
---|
54 | + |
---|
55 | # parse movie list from matches |
---|
56 | - my $beg = "<li>"; |
---|
57 | - my $end = "</li>"; |
---|
58 | + my $beg = "<td"; |
---|
59 | + my $end = "</td>"; |
---|
60 | my $count = 0; |
---|
61 | my @movies; |
---|
62 | |
---|
63 | @@ -445,7 +454,14 @@ |
---|
64 | my $year; |
---|
65 | my $type; |
---|
66 | my $title; |
---|
67 | + |
---|
68 | + if (defined $opt_d) {print "data = $data\n";}; |
---|
69 | + if (defined $opt_d) {print "Item start tag = $beg\n";}; |
---|
70 | + if (defined $opt_d) {print "start = $start finish = $finish\n";}; |
---|
71 | + |
---|
72 | while ($start != -1 && $start < length($data)) { |
---|
73 | + |
---|
74 | + if (defined $opt_d) { print "Item start = $start ";}; |
---|
75 | $start += length($beg); |
---|
76 | my $entry = substr($data, $start, $finish - $start); |
---|
77 | $start = index($data, $beg, $finish + 1); |
---|
78 | @@ -465,6 +481,7 @@ |
---|
79 | if ($lhs =~ m/<a href="\/title\/tt(\d+)\/.*\">(.+)<\/a>/i) { |
---|
80 | $movienum = $1; |
---|
81 | $title = $2; |
---|
82 | + if (defined $opt_d) { print "Movie $movienum, Title $title";}; |
---|
83 | } else { |
---|
84 | if (defined $opt_d) { |
---|
85 | print("Unrecognized entry format\n"); |
---|