MythTV master
thewb_api.py
Go to the documentation of this file.
1# -*- coding: UTF-8 -*-
2
3# ----------------------
4# Name: thewb_api - Simple-to-use Python interface to the The WB RSS feeds (http://www.thewb.com/)
5# Python Script
6# Author: R.D. Vaughan
7# Purpose: This python script is intended to perform a variety of utility functions to search and
8# access text metadata, video and image URLs from The WB.
9#
10# License:Creative Commons GNU GPL v2
11# (http://creativecommons.org/licenses/GPL/2.0/)
12#-------------------------------------
13__title__ ="thewb_api - Simple-to-use Python interface to the The WB RSS feeds (http://www.thewb.com/)"
14__author__="R.D. Vaughan"
15__purpose__='''
16This python script is intended to perform a variety of utility functions to search and access text
17meta data, video and image URLs from thewb. These routines process RSS feeds provided by The WB
18(http://www.thewb.com/). The specific "The WB" RSS feeds that are processed are controled through
19a user XML preference file usually found at "~/.mythtv/MythNetvision/userGrabberPrefs/thewb.xml"
20'''
21
22__version__="v0.1.3"
23# 0.1.0 Initial development
24# 0.1.1 Changed the logger to only output to stderr rather than a file
25# 0.1.2 Removed the need for python MythTV bindings and added "%SHAREDIR%" to icon directory path
26# 0.1.3 Fixes to accomodate changes to TheWB web site.
27
28import os, struct, sys, re, time, datetime, urllib.request, urllib.parse, urllib.error
29import logging
30from socket import gethostname, gethostbyname
31from threading import Thread
32from copy import deepcopy
33
34from .thewb_exceptions import (TheWBUrlError, TheWBHttpError, TheWBRssError, TheWBVideoNotFound, TheWBConfigFileError, TheWBUrlDownloadError)
35import io
36
37class OutStreamEncoder(object):
38 """Wraps a stream with an encoder"""
39 def __init__(self, outstream, encoding=None):
40 self.out = outstream
41 if not encoding:
42 self.encoding = sys.getfilesystemencoding()
43 else:
44 self.encoding = encoding
45
46 def write(self, obj):
47 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
48 if isinstance(obj, str):
49 obj = obj.encode(self.encoding)
50 self.out.buffer.write(obj)
51
52 def __getattr__(self, attr):
53 """Delegate everything but write to the stream"""
54 return getattr(self.out, attr)
55
56sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
57sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
58
59
60try:
61 from io import StringIO
62 from lxml import etree
63except Exception as e:
64 sys.stderr.write('\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
65 sys.exit(1)
66
67
68def can_int(x):
69 """Takes a string, checks if it is numeric.
70 >>> _can_int("2")
71 True
72 >>> _can_int("A test")
73 False
74 """
75 if x is None:
76 return False
77 try:
78 int(x)
79 except ValueError:
80 return False
81 else:
82 return True
83# end _can_int
84
85
86class Videos(object):
87 """Main interface to http://www.thewb.com/
88 This is done to support a common naming framework for all python Netvision plugins no matter their site
89 target.
90
91 Supports search methods
92 The apikey is a not required to access http://www.thewb.com/
93 """
94 def __init__(self,
95 apikey,
96 mythtv = True,
97 interactive = False,
98 select_first = False,
99 debug = False,
100 custom_ui = None,
101 language = None,
102 search_all_languages = False,
103 ):
104 """apikey (str/unicode):
105 Specify the target site API key. Applications need their own key in some cases
106
107 mythtv (True/False):
108 When True, the returned meta data is being returned has the key and values massaged to match MythTV
109 When False, the returned meta data is being returned matches what target site returned
110
111 interactive (True/False): (This option is not supported by all target site apis)
112 When True, uses built-in console UI is used to select the correct show.
113 When False, the first search result is used.
114
115 select_first (True/False): (This option is not supported currently implemented in any grabbers)
116 Automatically selects the first series search result (rather
117 than showing the user a list of more than one series).
118 Is overridden by interactive = False, or specifying a custom_ui
119
120 debug (True/False):
121 shows verbose debugging information
122
123 custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers)
124 A callable subclass of interactive class (overrides interactive option)
125
126 language (2 character language abbreviation): (This option is not supported by all target site apis)
127 The language of the returned data. Is also the language search
128 uses. Default is "en" (English). For full list, run..
129
130 search_all_languages (True/False): (This option is not supported by all target site apis)
131 By default, a Netvision grabber will only search in the language specified using
132 the language option. When this is True, it will search for the
133 show in any language
134
135 """
136 self.config = {}
137
138 if apikey is not None:
139 self.config['apikey'] = apikey
140 else:
141 pass # TheWB does not require an apikey
142
143 self.config['debug_enabled'] = debug # show debugging messages
144 self.common = common
145 self.common.debug = debug # Set the common function debug level
146
147 self.log_name = 'TheWB_Grabber'
148 self.common.logger = self.common.initLogger(path=sys.stderr, log_name=self.log_name)
149 self.logger = self.common.logger # Setups the logger (self.log.debug() etc)
150
151 self.config['custom_ui'] = custom_ui
152
153 self.config['interactive'] = interactive
154
155 self.config['select_first'] = select_first
156
157 self.config['search_all_languages'] = search_all_languages
158
159 self.error_messages = {'TheWBUrlError': "! Error: The URL (%s) cause the exception error (%s)\n", 'TheWBHttpError': "! Error: An HTTP communications error with The WB was raised (%s)\n", 'TheWBRssError': "! Error: Invalid RSS meta data\nwas received from The WB error (%s). Skipping item.\n", 'TheWBVideoNotFound': "! Error: Video search with The WB did not return any results (%s)\n", 'TheWBConfigFileError': "! Error: thewb_config.xml file missing\nit should be located in and named as (%s).\n", 'TheWBUrlDownloadError': "! Error: Downloading a RSS feed or Web page (%s).\n", }
160
161 # Channel details and search results
162 self.channel = {'channel_title': 'The WB', 'channel_link': 'http://www.thewb.com/', 'channel_description': "Watch full episodes of your favorite shows on The WB.com, like Friends, The O.C., Veronica Mars, Pushing Daisies, Smallville, Buffy The Vampire Slayer, One Tree Hill and Gilmore Girls.", 'channel_numresults': 0, 'channel_returned': 1, 'channel_startindex': 0}
163
164
165 # Season and Episode detection regex patterns
167 # Season 3: Ep. 13 (01:04:30)
168 re.compile('''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<hours>[0-9]+)\\:(?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
169 # Season 3: Ep. 13 (04:30)
170 re.compile('''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
171 # Season 3: Ep. 13
172 re.compile('''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
173 # Ep. 13 (01:04:30)
174 re.compile('''Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<hours>[0-9]+)\\:(?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
175 # Ep. 13 (04:30)
176 re.compile('''Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
177 # Ep. 13
178 re.compile('''Ep.\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
179 ]
180
181 self.channel_icon = '%SHAREDIR%/mythnetvision/icons/thewb.png'
182 # end __init__()
183
184
189
190 def getSeasonEpisode(self, title):
191 ''' Check is there is any season or episode number information in an item's title
192 return array of season and/or episode numbers plus any duration in minutes and seconds
193 return array with None values
194 '''
195 s_e = []
196 for index in range(len(self.s_e_Patterns)):
197 match = self.s_e_Patterns[index].match(title)
198 if not match:
199 continue
200 return match.groups()
201 return s_e
202 # end getSeasonEpisode()
203
204 def getTheWBConfig(self):
205 ''' Read the MNV The WB grabber "thewb_config.xml" configuration file
206 return nothing
207 '''
208 # Read the grabber thewb_config.xml configuration file
209 url = 'file://%s/nv_python_libs/configs/XML/thewb_config.xml' % (baseProcessingDir, )
210 if not os.path.isfile(url[7:]):
211 raise TheWBConfigFileError(self.error_messages['TheWBConfigFileError'] % (url[7:], ))
212
213 if self.config['debug_enabled']:
214 print(url)
215 print()
216 try:
217 self.thewb_config = etree.parse(url)
218 except Exception as e:
219 raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
220 return
221 # end getTheWBConfig()
222
223
225 '''Read the thewb_config.xml and user preference thewb.xml file.
226 If the thewb.xml file does not exist then create it.
227 If the thewb.xml file is too old then update it.
228 return nothing
229 '''
230 # Get thewb_config.xml
231 self.getTheWBConfig()
232
233 # Check if the thewb.xml file exists
234 userPreferenceFile = self.thewb_config.find('userPreferenceFile').text
235 if userPreferenceFile[0] == '~':
236 self.thewb_config.find('userPreferenceFile').text = "%s%s" % (os.path.expanduser("~"), userPreferenceFile[1:])
237 if os.path.isfile(self.thewb_config.find('userPreferenceFile').text):
238 # Read the grabber thewb_config.xml configuration file
239 url = 'file://%s' % (self.thewb_config.find('userPreferenceFile').text, )
240 if self.config['debug_enabled']:
241 print(url)
242 print()
243 try:
244 self.userPrefs = etree.parse(url)
245 except Exception as e:
246 raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
247 # Check if the thewb.xml file is too old
248 nextUpdateSecs = int(self.userPrefs.find('updateDuration').text)*86400 # seconds in a day
249 nextUpdate = time.localtime(os.path.getmtime(self.thewb_config.find('userPreferenceFile').text)+nextUpdateSecs)
250 now = time.localtime()
251 if nextUpdate > now:
252 return
253 create = False
254 else:
255 create = True
256
257 # If required create/update the thewb.xml file
258 self.updateTheWB(create)
259 return
260 # end getUserPreferences()
261
262 def updateTheWB(self, create=False):
263 ''' Create or update the thewb.xml user preferences file
264 return nothing
265 '''
266 # Read the default user preferences file
267 url = 'file://%s/nv_python_libs/configs/XML/defaultUserPrefs/thewb.xml' % (baseProcessingDir, )
268 if not os.path.isfile(url[7:]):
269 raise TheWBConfigFileError(self.error_messages['TheWBConfigFileError'] % (url[7:], ))
270
271 if self.config['debug_enabled']:
272 print('updateTheWB url(%s)' % url)
273 print()
274 try:
275 userTheWB = etree.parse(url)
276 except Exception as e:
277 raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
278
279 # Get the current show links from the TheWB web site
280 linksTree = self.common.getUrlData(self.thewb_config.find('treeviewUrls'))
281
282 if self.config['debug_enabled']:
283 print("create(%s)" % create)
284 print("linksTree:")
285 sys.stdout.write(etree.tostring(linksTree, encoding='UTF-8', pretty_print=True))
286 print()
287
288 # Check that at least several show directories were returned
289 if not create:
290 if not len(linksTree.xpath('//results//a')) > 10:
291 return self.userPrefs
292
293 # Assemble the feeds and formats
294 root = etree.XML('<xml></xml>')
295 for directory in linksTree.xpath('//results'):
296 tmpDirectory = etree.SubElement(root, 'showDirectories')
297 tmpDirectory.attrib['name'] = directory.find('name').text
298 for show in directory.xpath('.//a'):
299 showName = show.text
300 # Skip any DVD references as they are not on-line videos
301 if showName.lower().find('dvd') != -1 or show.attrib['href'].lower().find('dvd') != -1:
302 continue
303 tmpShow = etree.XML('<url></url>')
304 tmpShow.attrib['enabled'] = 'true'
305 tmpShow.attrib['name'] = self.common.massageText(showName.strip())
306 tmpShow.text = self.common.ampReplace(show.attrib['href'].replace('/shows/', '').replace('/', '').strip())
307 tmpDirectory.append(tmpShow)
308
309 if self.config['debug_enabled']:
310 print("Before any merging userTheWB:")
311 sys.stdout.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True))
312 print()
313
314 # If there was an existing thewb.xml file then add any relevant user settings to
315 # this new thewb.xml
316 if not create:
317 userTheWB.find('updateDuration').text = self.userPrefs.find('updateDuration').text
318 if self.userPrefs.find('showDirectories').get('globalmax'):
319 root.find('showDirectories').attrib['globalmax'] = self.userPrefs.find('showDirectories').attrib['globalmax']
320 for rss in self.userPrefs.xpath("//url[@enabled='false']"):
321 elements = root.xpath("//url[text()=$URL]", URL=rss.text.strip())
322 if len(elements):
323 elements[0].attrib['enabled'] = 'false'
324 if rss.get('max'):
325 elements[0].attrib['max'] = rss.attrib['max']
326
327 if self.config['debug_enabled']:
328 print("After any merging userTheWB:")
329 sys.stdout.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True))
330 print()
331
332 # Save the thewb.xml file
333 prefDir = self.thewb_config.find('userPreferenceFile').text.replace('/thewb.xml', '')
334 if not os.path.isdir(prefDir):
335 os.makedirs(prefDir)
336 fd = open(self.thewb_config.find('userPreferenceFile').text, 'w')
337 fd.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True)[:-len('</userTheWB>')-1]+''.join(etree.tostring(element, encoding='UTF-8', pretty_print=True) for element in root.xpath('/xml/*'))+'</userTheWB>')
338 fd.close()
339
340 # Input the refreshed user preference data
341 try:
342 self.userPrefs = etree.parse(self.thewb_config.find('userPreferenceFile').text)
343 except Exception as e:
344 raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
345 return
346 # end updateTheWB()
347
348
353
354
355 def searchTitle(self, title, pagenumber, pagelen, ignoreError=False):
356 '''Key word video search of the TheWB web site
357 return an array of matching item elements
358 return
359 '''
360 orgURL = self.thewb_config.find('searchURLS').xpath(".//href")[0].text
361
362 try:
363 searchVar = '?q=%s' % (urllib.parse.quote(title.encode("utf-8")).replace(' ', '+'))
364 except UnicodeDecodeError:
365 searchVar = '?q=%s' % (urllib.parse.quote(title).replace(' ', '+'))
366 url = self.thewb_config.find('searchURLS').xpath(".//href")[0].text+searchVar
367
368 if self.config['debug_enabled']:
369 print("Search url(%s)" % url)
370 print()
371
372 self.thewb_config.find('searchURLS').xpath(".//href")[0].text = url
373
374 # Perform a search
375 try:
376 resultTree = self.common.getUrlData(self.thewb_config.find('searchURLS'), pageFilter=None)
377 except Exception as errormsg:
378 self.thewb_config.find('searchURLS').xpath(".//href")[0].text = orgURL
379 raise TheWBUrlDownloadError(self.error_messages['TheWBUrlDownloadError'] % (errormsg))
380
381 self.thewb_config.find('searchURLS').xpath(".//href")[0].text = orgURL
382
383 if self.config['debug_enabled']:
384 print("resultTree count(%s)" % len(resultTree))
385 print(etree.tostring(resultTree, encoding='UTF-8', pretty_print=True))
386 print()
387
388 if resultTree is None:
389 if ignoreError:
390 return [None, None]
391 raise TheWBVideoNotFound("No TheWB.com Video matches found for search value (%s)" % title)
392
393 searchResults = resultTree.xpath('//result/div')
394 if not len(searchResults):
395 if ignoreError:
396 return [None, None]
397 raise TheWBVideoNotFound("No TheWB.com Video matches found for search value (%s)" % title)
398
399 # Set the number of search results returned
400 self.channel['channel_numresults'] = len(searchResults)
401
402 # TheWB search results fo not have a pubDate so use the current data time
403 # e.g. "Sun, 06 Jan 2008 21:44:36 GMT"
404 pubDate = datetime.datetime.now().strftime(self.common.pubDateFormat)
405
406 # Translate the search results into MNV RSS item format
407 thumbNailFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//img')
408 textFilter = etree.XPath('.//div[@class="overlay-bg-middle"]/p')
409 titleFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//a[@title!=""]/@title')
410 descFilter = etree.XPath('.//div[@class="overlay-bg-middle"]/p[@class="overlay_extra overlay_spacer_top"]/text()')
411 linkFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//a[@title!=""]/@href')
412 itemThumbNail = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces)
413 itemDwnLink = etree.XPath('.//media:content', namespaces=self.common.namespaces)
414 itemDict = {}
415 for result in searchResults:
416 if linkFilter(result) is not None: # Make sure that this result actually has a video
417 thewbItem = etree.XML(self.common.mnvItem)
418 # These videos are only viewable in the US so add a country indicator
419 etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = 'us'
420 # Extract and massage data
421 thumbNail = self.common.ampReplace(thumbNailFilter(result)[0].attrib['src'])
422 title = titleFilter(result)[0].strip()
423 link = 'file://%s/nv_python_libs/configs/HTML/thewb.html?videocode=%s' % (baseProcessingDir, result.attrib['id'].replace('video_', ''))
424 etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text = 'true'
425 descriptionElement = textFilter(result)[0]
426 description = ''
427 tmptitle = None
428 seasonNum = None
429 episodeNum = None
430 for e in descriptionElement.xpath('./*'):
431 try:
432 eText = str(e.tail, 'UTF-8').strip()
433 except:
434 continue
435 if eText.startswith('Season ') or eText.startswith('EP'):
436 sed = self.getSeasonEpisode(eText)
437 if not len(sed):
438 continue
439 infoList = 'S%02dE%02d' % (int(sed[0]), int(sed[1]))
440 seasonNum = '%d' % int(sed[0])
441 episodeNum = '%d' % int(sed[1])
442 if len(sed) == 5:
443 videoSeconds = int(sed[2])*3600+int(sed[3])*60+int(sed[4])
444 itemDwnLink(thewbItem)[0].attrib['duration'] = str(videoSeconds)
445 elif len(sed) == 4:
446 videoSeconds = int(sed[2])*60+int(sed[3])
447 itemDwnLink(thewbItem)[0].attrib['duration'] = str(videoSeconds)
448
449 index = title.find(':')
450 if index != -1:
451 tmptitle = '%s: %s %s' % (title[:index].strip(), infoList, title[index+1:].strip())
452 else:
453 tmptitle = '%s: %s' % (title.strip(), infoList)
454 if tmptitle:
455 title = tmptitle
456 title = self.common.massageText(title.strip())
457 description = self.common.massageText(descFilter(result)[0].strip())
458
459 # Insert data into a new item element
460 thewbItem.find('title').text = title
461 thewbItem.find('author').text = "The WB.com"
462 thewbItem.find('pubDate').text = pubDate
463 thewbItem.find('description').text = description
464 thewbItem.find('link').text = link
465 itemThumbNail(thewbItem)[0].attrib['url'] = thumbNail
466 itemDwnLink(thewbItem)[0].attrib['url'] = link
467 if seasonNum:
468 etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = seasonNum
469 if episodeNum:
470 etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = episodeNum
471 itemDict[title.lower()] = thewbItem
472
473 if not len(list(itemDict.keys())):
474 if ignoreError:
475 return [None, None]
476 raise TheWBVideoNotFound("No TheWB Video matches found for search value (%s)" % title)
477
478 return [itemDict, resultTree.xpath('//pageInfo')[0].text]
479 # end searchTitle()
480
481
482 def searchForVideos(self, title, pagenumber):
483 """Common name for a video search. Used to interface with MythTV plugin NetVision
484 """
485 # Get thewb_config.xml
486 self.getTheWBConfig()
487
488 if self.config['debug_enabled']:
489 print("self.thewb_config:")
490 sys.stdout.write(etree.tostring(self.thewb_config, encoding='UTF-8', pretty_print=True))
491 print()
492
493 # Easier for debugging
494# print self.searchTitle(title, pagenumber, self.page_limit)
495# print
496# sys.exit()
497
498 try:
499 data = self.searchTitle(title, pagenumber, self.page_limit)
500 except TheWBVideoNotFound as msg:
501 sys.stderr.write("%s\n" % msg)
502 sys.exit(0)
503 except TheWBUrlError as msg:
504 sys.stderr.write('%s\n' % msg)
505 sys.exit(1)
506 except TheWBHttpError as msg:
507 sys.stderr.write(self.error_messages['TheWBHttpError'] % msg)
508 sys.exit(1)
509 except TheWBRssError as msg:
510 sys.stderr.write(self.error_messages['TheWBRssError'] % msg)
511 sys.exit(1)
512 except Exception as e:
513 sys.stderr.write("! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
514 sys.exit(1)
515
516 # Create RSS element tree
517 rssTree = etree.XML(self.common.mnvRSS+'</rss>')
518
519 # Set the paging values
520 itemCount = len(list(data[0].keys()))
521 if data[1] == 'true':
522 self.channel['channel_returned'] = itemCount
523 self.channel['channel_startindex'] = itemCount
524 self.channel['channel_numresults'] = itemCount+(self.page_limit*(int(pagenumber)-1)+1)
525 else:
526 self.channel['channel_returned'] = itemCount
527 self.channel['channel_startindex'] = self.channel['channel_returned']
528 self.channel['channel_numresults'] = self.channel['channel_returned']
529
530 # Add the Channel element tree
531 channelTree = self.common.mnvChannelElement(self.channel)
532 rssTree.append(channelTree)
533
534 lastKey = None
535
536 for key in sorted(data[0].keys()):
537 if lastKey != key:
538 channelTree.append(data[0][key])
539 lastKey = key
540
541 # Output the MNV search results
542 sys.stdout.write('<?xml version="1.0" encoding="UTF-8"?>\n')
543 sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True))
544 sys.exit(0)
545 # end searchForVideos()
546
548 '''Gather the The WB feeds then get a max page of videos meta data in each of them
549 Display the results and exit
550 '''
551 # Get the user preferences that specify which shows and formats they want to be in the treeview
552 try:
553 self.getUserPreferences()
554 except Exception as e:
555 sys.stderr.write('%s\n' % e)
556 sys.exit(1)
557
558 # Verify that there is at least one RSS feed that user wants to download
559 showFeeds = self.userPrefs.xpath("//showDirectories//url[@enabled='true']")
560 totalFeeds = self.userPrefs.xpath("//url[@enabled='true']")
561
562 if self.config['debug_enabled']:
563 print("self.userPrefs show count(%s) total feed count(%s):" % (len(showFeeds), len(totalFeeds)))
564 sys.stdout.write(etree.tostring(self.userPrefs, encoding='UTF-8', pretty_print=True))
565 print()
566
567 if not len(totalFeeds):
568 sys.stderr.write('There are no show or treeviewURLS elements "enabled" in your "thewb.xml" user preferences\nfile (%s)\n' % self.thewb_config.find('userPreferenceFile').text)
569 sys.exit(1)
570
571 # Massage channel icon
572 self.channel_icon = self.common.ampReplace(self.channel_icon)
573
574 # Create RSS element tree
575 rssTree = etree.XML(self.common.mnvRSS+'</rss>')
576
577 # Add the Channel element tree
578 channelTree = self.common.mnvChannelElement(self.channel)
579 rssTree.append(channelTree)
580
581 # Process any user specified searches
582 showItems = {}
583 if len(showFeeds) is not None:
584 for searchDetails in showFeeds:
585 try:
586 data = self.searchTitle(searchDetails.text.strip(), 1, self.page_limit, ignoreError=True)
587 if data[0] is None:
588 continue
589 except TheWBVideoNotFound as msg:
590 sys.stderr.write("%s\n" % msg)
591 continue
592 except TheWBUrlError as msg:
593 sys.stderr.write('%s\n' % msg)
594 continue
595 except TheWBHttpError as msg:
596 sys.stderr.write(self.error_messages['TheWBHttpError'] % msg)
597 continue
598 except TheWBRssError as msg:
599 sys.stderr.write(self.error_messages['TheWBRssError'] % msg)
600 continue
601 except Exception as e:
602 sys.stderr.write("! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (searchDetails.text.strip(), e))
603 continue
604 data.append(searchDetails.attrib['name'])
605 showItems[self.common.massageText(searchDetails.text.strip())] = data
606 continue
607
608 if self.config['debug_enabled']:
609 print("After searches count(%s):" % len(showItems))
610 for key in list(showItems.keys()):
611 print("Show(%s) name(%s) item count(%s)" % (key, showItems[key][2], len(showItems[key][0])))
612 print()
613
614 # Filter out any items that are not specifically for the show
615 for showNameKey in list(showItems.keys()):
616 tmpList = {}
617 for key in list(showItems[showNameKey][0].keys()):
618 tmpLink = showItems[showNameKey][0][key].find('link').text.replace(self.thewb_config.find('searchURLS').xpath(".//href")[0].text, '')
619 if tmpLink.startswith(showNameKey):
620 tmpList[key] = showItems[showNameKey][0][key]
621 showItems[showNameKey][0] = tmpList
622
623 if self.config['debug_enabled']:
624 print("After search filter of non-show items count(%s):" % len(showItems))
625 for key in list(showItems.keys()):
626 print("Show(%s) name(%s) item count(%s)" % (key, showItems[key][2], len(showItems[key][0])))
627 print()
628
629 # Create a structure of feeds that concurrently have videos
630 rssData = etree.XML('<xml></xml>')
631 rssFeedsUrl = 'http://www.thewb.com/shows/feed/'
632 for feedType in self.userPrefs.findall('showDirectories'):
633 for rssFeed in self.userPrefs.xpath("//showDirectories/url[@enabled='true']"):
634 link = rssFeedsUrl+rssFeed.text
635 urlName = rssFeed.attrib.get('name')
636 if urlName:
637 uniqueName = '%s;%s' % (urlName, link)
638 else:
639 uniqueName = 'RSS;%s' % (link)
640 url = etree.XML('<url></url>')
641 etree.SubElement(url, "name").text = uniqueName
642 etree.SubElement(url, "href").text = link
643 etree.SubElement(url, "filter").text = "//channel/title"
644 etree.SubElement(url, "filter").text = "//item"
645 etree.SubElement(url, "parserType").text = 'xml'
646 rssData.append(url)
647
648 if self.config['debug_enabled']:
649 print("rssData:")
650 sys.stdout.write(etree.tostring(rssData, encoding='UTF-8', pretty_print=True))
651 print()
652
653 # Get the RSS Feed data
654 self.channelLanguage = 'en'
655 self.itemAuthor = 'The WB.com'
656 self.itemFilter = etree.XPath('.//item', namespaces=self.common.namespaces)
657 self.titleFilter = etree.XPath('.//title', namespaces=self.common.namespaces)
658 self.linkFilter = etree.XPath('.//link', namespaces=self.common.namespaces)
659 self.descFilter1 = etree.XPath('.//description', namespaces=self.common.namespaces)
660 self.descFilter2 = etree.XPath("//text()")
661 self.pubdateFilter = etree.XPath('.//pubDate', namespaces=self.common.namespaces)
662 self.thumbNailFilter = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces)
663 self.itemThumbNail = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces)
664 self.itemDwnLink = etree.XPath('.//media:content', namespaces=self.common.namespaces)
665 self.rssName = etree.XPath('title', namespaces=self.common.namespaces)
666 self.feedFilter = etree.XPath('//url[text()=$url]')
667 self.HTMLparser = etree.HTMLParser()
668 if rssData.find('url') is not None:
669 try:
670 resultTree = self.common.getUrlData(rssData)
671 except Exception as errormsg:
672 raise TheWBUrlDownloadError(self.error_messages['TheWBUrlDownloadError'] % (errormsg))
673
674 if self.config['debug_enabled']:
675 print("resultTree:")
676 sys.stdout.write(etree.tostring(resultTree, encoding='UTF-8', pretty_print=True))
677 print()
678
679 # Process each directory of the user preferences that have an enabled rss feed
680 for result in resultTree.findall('results'):
681 names = result.find('name').text.split(';')
682 names[0] = self.common.massageText(names[0])
683 if names[0] == 'RSS':
684 names[0] = self.common.massageText(self.rssName(result.find('result'))[0].text.strip())
685 urlName = names[0]
686 else:
687 urlName = result.find('url').text.replace(rssFeedsUrl, '').strip()
688
689 urlMax = None
690 url = self.feedFilter(self.userPrefs, url=names[1])
691 if len(url):
692 if url[0].attrib.get('max'):
693 try:
694 urlMax = int(url[0].attrib.get('max'))
695 except:
696 pass
697 elif url[0].getparent().attrib.get('globalmax'):
698 try:
699 urlMax = int(url[0].getparent().attrib.get('globalmax'))
700 except:
701 pass
702 if urlMax == 0:
703 urlMax = None
704 if self.config['debug_enabled']:
705 print("Results: #Items(%s) for (%s)" % (len(self.itemFilter(result)), names))
706 print()
707 self.createItems(showItems, result, urlName, names[0], urlMax=urlMax)
708 continue
709
710 # Add all the shows and rss items to the channel
711 for key in sorted(showItems.keys()):
712 if not len(showItems[key][0]):
713 continue
714 # Create a new directory and/or subdirectory if required
715 directoryElement = etree.SubElement(channelTree, 'directory')
716 directoryElement.attrib['name'] = showItems[key][2]
717 directoryElement.attrib['thumbnail'] = self.channel_icon
718
719 if self.config['debug_enabled']:
720 print("Results: #Items(%s) for (%s)" % (len(showItems[key][0]), showItems[key][2]))
721 print()
722
723 # Copy all the items into the MNV RSS directory
724 for itemKey in sorted(showItems[key][0].keys()):
725 directoryElement.append(showItems[key][0][itemKey])
726
727 if self.config['debug_enabled']:
728 print("Final results: #Items(%s)" % len(rssTree.xpath('//item')))
729 print()
730
731 # Check that there was at least some items
732 if len(rssTree.xpath('//item')):
733 # Output the MNV search results
734 sys.stdout.write('<?xml version="1.0" encoding="UTF-8"?>\n')
735 sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True))
736
737 sys.exit(0)
738 # end displayTreeView()
739
740 def createItems(self, showItems, result, urlName, showName, urlMax=None):
741 '''Create a dictionary of MNV compliant RSS items from the results of a RSS feed show search.
742 Also merge with any items that were found by using the Web search. Identical items use the RSS
743 feed item data over the search item as RSS provides better results.
744 return nothing as the show item dictionary will have all the results
745 '''
746 # Initalize show if it has not already had a search result
747 if not urlName in list(showItems.keys()):
748 showItems[urlName] = [{}, None, showName]
749
750 # Convert each RSS item into a MNV item
751 count = 0
752 for thewbItem in self.itemFilter(result):
753 newItem = etree.XML(self.common.mnvItem)
754 # These videos are only viewable in the US so add a country indicator
755 etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = 'us'
756 # Extract and massage data
757 tmpLink = self.linkFilter(thewbItem)[0].text.strip()
758 link = self.common.ampReplace('file://%s/nv_python_libs/configs/HTML/thewb.html?videocode=%s' % (baseProcessingDir, tmpLink[tmpLink.rfind('/')+1:]))
759 etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text = 'true'
760 # Convert the pubDate '2010-05-02T11:23:25-07:00' to a MNV pubdate format
761 pubdate = self.pubdateFilter(thewbItem)
762 if len(pubdate):
763 pubdate = pubdate[0].text[:-6]
764 pubdate = time.strptime(pubdate, '%Y-%m-%dT%H:%M:%S')
765 pubdate = time.strftime(self.common.pubDateFormat, pubdate)
766 else:
767 pubdate = datetime.datetime.now().strftime(self.common.pubDateFormat)
768 title = self.common.massageText(self.titleFilter(thewbItem)[0].text.strip())
769 tmptitle = None
770 descList = self.descFilter2(etree.parse(StringIO(self.descFilter1(thewbItem)[0].text), self.HTMLparser))
771 description = None
772 seasonNum = None
773 episodeNum = None
774 for eText in descList:
775 if eText == '\n\t':
776 continue
777 eText = eText.strip().encode('UTF-8')
778 if not description:
779 description = eText
780 continue
781 try:
782 if eText.startswith('Season: ') or eText.startswith('EP: '):
783 s_e = eText.replace('Season:','').replace(', Episode:','').replace('EP:','').strip().split(' ')
784 if len(s_e) == 1 and can_int(s_e[0].strip()):
785 eText = 'Ep(%02d)' % int(s_e[0].strip())
786 episodeNum = s_e[0].strip()
787 elif len(s_e) == 2 and can_int(s_e[0].strip()) and can_int(s_e[1].strip()):
788 eText = 'S%02dE%02d' % (int(s_e[0].strip()), int(s_e[1].strip()))
789 seasonNum = s_e[0].strip()
790 episodeNum = s_e[1].strip()
791 title = title.replace('-', '–')
792 index = title.find('–')
793 if index != -1:
794 tmptitle = '%s: %s %s' % (title[:index].strip(), eText.strip(), title[index:].strip())
795 else:
796 tmptitle = '%s %s' % (title, eText.strip())
797 continue
798 elif eText.startswith('Running Time: '):
799 videoDuration = eText.replace('Running Time: ', '').strip().split(':')
800 if not len(videoDuration):
801 continue
802 videoSeconds = False
803 try:
804 if len(videoDuration) == 1:
805 videoSeconds = int(videoDuration[0])
806 elif len(videoDuration) == 2:
807 videoSeconds = int(videoDuration[0])*60+int(videoDuration[1])
808 elif len(videoDuration) == 3:
809 videoSeconds = int(videoDuration[0])*3600+int(videoDuration[1])*60+int(videoDuration[2])
810 if videoSeconds:
811 self.itemDwnLink(newItem)[0].attrib['duration'] = str(videoSeconds)
812 except:
813 pass
814 except UnicodeDecodeError:
815 continue
816
817 if tmptitle:
818 title = tmptitle
819 title = self.common.massageText(title.strip())
820 description = self.common.massageText(description.strip())
821 # Insert data into a new item element
822 newItem.find('title').text = title
823 newItem.find('author').text = self.itemAuthor
824 newItem.find('pubDate').text = pubdate
825 newItem.find('description').text = description
826 newItem.find('link').text = link
827 self.itemDwnLink(newItem)[0].attrib['url'] = link
828 try:
829 self.itemThumbNail(newItem)[0].attrib['url'] = self.common.ampReplace(self.itemThumbNail(thewbItem)[0].attrib['url'])
830 except IndexError:
831 pass
832 self.itemDwnLink(newItem)[0].attrib['lang'] = self.channelLanguage
833 if seasonNum:
834 etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = seasonNum
835 if episodeNum:
836 etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = episodeNum
837 # Merge RSS results with search results and override any duplicates with the RSS item
838 showItems[urlName][0][title.lower()] = newItem
839 if urlMax: # Check of the maximum items to processes has been met
840 count+=1
841 if count > urlMax:
842 break
843 return
844 # end createItems()
845# end Videos() class
def __init__(self, outstream, encoding=None)
Definition: thewb_api.py:39
def updateTheWB(self, create=False)
Definition: thewb_api.py:262
def searchForVideos(self, title, pagenumber)
Definition: thewb_api.py:482
def searchTitle(self, title, pagenumber, pagelen, ignoreError=False)
End of Utility functions.
Definition: thewb_api.py:355
def __init__(self, apikey, mythtv=True, interactive=False, select_first=False, debug=False, custom_ui=None, language=None, search_all_languages=False)
Definition: thewb_api.py:103
def createItems(self, showItems, result, urlName, showName, urlMax=None)
Definition: thewb_api.py:740
def getSeasonEpisode(self, title)
Start - Utility functions.
Definition: thewb_api.py:190
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)
static void print(const QList< uint > &raw_minimas, const QList< uint > &raw_maximas, const QList< float > &minimas, const QList< float > &maximas)