MythTV  master
thewb_api.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3 # ----------------------
4 # Name: thewb_api - Simple-to-use Python interface to the The WB RSS feeds (http://www.thewb.com/)
5 # Python Script
6 # Author: R.D. Vaughan
7 # Purpose: This python script is intended to perform a variety of utility functions to search and
8 # access text metadata, video and image URLs from The WB.
9 #
10 # License:Creative Commons GNU GPL v2
11 # (http://creativecommons.org/licenses/GPL/2.0/)
12 #-------------------------------------
13 __title__ ="thewb_api - Simple-to-use Python interface to the The WB RSS feeds (http://www.thewb.com/)"
14 __author__="R.D. Vaughan"
15 __purpose__='''
16 This python script is intended to perform a variety of utility functions to search and access text
17 meta data, video and image URLs from thewb. These routines process RSS feeds provided by The WB
18 (http://www.thewb.com/). The specific "The WB" RSS feeds that are processed are controled through
19 a user XML preference file usually found at "~/.mythtv/MythNetvision/userGrabberPrefs/thewb.xml"
20 '''
21 
22 __version__="v0.1.3"
23 # 0.1.0 Initial development
24 # 0.1.1 Changed the logger to only output to stderr rather than a file
25 # 0.1.2 Removed the need for python MythTV bindings and added "%SHAREDIR%" to icon directory path
26 # 0.1.3 Fixes to accomodate changes to TheWB web site.
27 
28 import os, struct, sys, re, time, datetime, urllib
29 import logging
30 from socket import gethostname, gethostbyname
31 from threading import Thread
32 from copy import deepcopy
33 
34 from thewb_exceptions import (TheWBUrlError, TheWBHttpError, TheWBRssError, TheWBVideoNotFound, TheWBConfigFileError, TheWBUrlDownloadError)
35 
36 class OutStreamEncoder(object):
37  """Wraps a stream with an encoder"""
38  def __init__(self, outstream, encoding=None):
39  self.out = outstream
40  if not encoding:
41  self.encoding = sys.getfilesystemencoding()
42  else:
43  self.encoding = encoding
44 
45  def write(self, obj):
46  """Wraps the output stream, encoding Unicode strings with the specified encoding"""
47  if isinstance(obj, unicode):
48  try:
49  self.out.write(obj.encode(self.encoding))
50  except IOError:
51  pass
52  else:
53  try:
54  self.out.write(obj)
55  except IOError:
56  pass
57 
58  def __getattr__(self, attr):
59  """Delegate everything but write to the stream"""
60  return getattr(self.out, attr)
61 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
62 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
63 
64 
65 try:
66  from StringIO import StringIO
67  from lxml import etree
68 except Exception, e:
69  sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
70  sys.exit(1)
71 
72 # Check that the lxml library is current enough
73 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
74 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
75 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
76 version = ''
77 for digit in etree.LIBXML_VERSION:
78  version+=str(digit)+'.'
79 version = version[:-1]
80 if version < '2.7.2':
81  sys.stderr.write(u'''
82 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
83  At least "libxml" version 2.7.2 must be installed. Your version is (%s).
84 ''' % version)
85  sys.exit(1)
86 
87 
88 def can_int(x):
89  """Takes a string, checks if it is numeric.
90  >>> _can_int("2")
91  True
92  >>> _can_int("A test")
93  False
94  """
95  if x == None:
96  return False
97  try:
98  int(x)
99  except ValueError:
100  return False
101  else:
102  return True
103 # end _can_int
104 
105 
106 class Videos(object):
107  """Main interface to http://www.thewb.com/
108  This is done to support a common naming framework for all python Netvision plugins no matter their site
109  target.
110 
111  Supports search methods
112  The apikey is a not required to access http://www.thewb.com/
113  """
114  def __init__(self,
115  apikey,
116  mythtv = True,
117  interactive = False,
118  select_first = False,
119  debug = False,
120  custom_ui = None,
121  language = None,
122  search_all_languages = False,
123  ):
124  """apikey (str/unicode):
125  Specify the target site API key. Applications need their own key in some cases
126 
127  mythtv (True/False):
128  When True, the returned meta data is being returned has the key and values massaged to match MythTV
129  When False, the returned meta data is being returned matches what target site returned
130 
131  interactive (True/False): (This option is not supported by all target site apis)
132  When True, uses built-in console UI is used to select the correct show.
133  When False, the first search result is used.
134 
135  select_first (True/False): (This option is not supported currently implemented in any grabbers)
136  Automatically selects the first series search result (rather
137  than showing the user a list of more than one series).
138  Is overridden by interactive = False, or specifying a custom_ui
139 
140  debug (True/False):
141  shows verbose debugging information
142 
143  custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers)
144  A callable subclass of interactive class (overrides interactive option)
145 
146  language (2 character language abbreviation): (This option is not supported by all target site apis)
147  The language of the returned data. Is also the language search
148  uses. Default is "en" (English). For full list, run..
149 
150  search_all_languages (True/False): (This option is not supported by all target site apis)
151  By default, a Netvision grabber will only search in the language specified using
152  the language option. When this is True, it will search for the
153  show in any language
154 
155  """
156  self.config = {}
157 
158  if apikey is not None:
159  self.config['apikey'] = apikey
160  else:
161  pass # TheWB does not require an apikey
162 
163  self.config['debug_enabled'] = debug # show debugging messages
164  self.common = common
165  self.common.debug = debug # Set the common function debug level
166 
167  self.log_name = u'TheWB_Grabber'
168  self.common.logger = self.common.initLogger(path=sys.stderr, log_name=self.log_name)
169  self.logger = self.common.logger # Setups the logger (self.log.debug() etc)
171  self.config['custom_ui'] = custom_ui
172 
173  self.config['interactive'] = interactive
174 
175  self.config['select_first'] = select_first
176 
177  self.config['search_all_languages'] = search_all_languages
178 
179  self.error_messages = {'TheWBUrlError': u"! Error: The URL (%s) cause the exception error (%s)\n", 'TheWBHttpError': u"! Error: An HTTP communications error with The WB was raised (%s)\n", 'TheWBRssError': u"! Error: Invalid RSS meta data\nwas received from The WB error (%s). Skipping item.\n", 'TheWBVideoNotFound': u"! Error: Video search with The WB did not return any results (%s)\n", 'TheWBConfigFileError': u"! Error: thewb_config.xml file missing\nit should be located in and named as (%s).\n", 'TheWBUrlDownloadError': u"! Error: Downloading a RSS feed or Web page (%s).\n", }
180 
181  # Channel details and search results
182  self.channel = {'channel_title': u'The WB', 'channel_link': u'http://www.thewb.com/', 'channel_description': u"Watch full episodes of your favorite shows on The WB.com, like Friends, The O.C., Veronica Mars, Pushing Daisies, Smallville, Buffy The Vampire Slayer, One Tree Hill and Gilmore Girls.", 'channel_numresults': 0, 'channel_returned': 1, u'channel_startindex': 0}
183 
184 
185  # Season and Episode detection regex patterns
186  self.s_e_Patterns = [
187  # Season 3: Ep. 13 (01:04:30)
188  re.compile(u'''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<hours>[0-9]+)\\:(?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
189  # Season 3: Ep. 13 (04:30)
190  re.compile(u'''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
191  # Season 3: Ep. 13
192  re.compile(u'''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
193  # Ep. 13 (01:04:30)
194  re.compile(u'''Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<hours>[0-9]+)\\:(?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
195  # Ep. 13 (04:30)
196  re.compile(u'''Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
197  # Ep. 13
198  re.compile(u'''Ep.\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
199  ]
200 
201  self.channel_icon = u'%SHAREDIR%/mythnetvision/icons/thewb.png'
202  # end __init__()
203 
204 
209 
210  def getSeasonEpisode(self, title):
211  ''' Check is there is any season or episode number information in an item's title
212  return array of season and/or episode numbers plus any duration in minutes and seconds
213  return array with None values
214  '''
215  s_e = []
216  for index in range(len(self.s_e_Patterns)):
217  match = self.s_e_Patterns[index].match(title)
218  if not match:
219  continue
220  return match.groups()
221  return s_e
222  # end getSeasonEpisode()
223 
224  def getTheWBConfig(self):
225  ''' Read the MNV The WB grabber "thewb_config.xml" configuration file
226  return nothing
227  '''
228  # Read the grabber thewb_config.xml configuration file
229  url = u'file://%s/nv_python_libs/configs/XML/thewb_config.xml' % (baseProcessingDir, )
230  if not os.path.isfile(url[7:]):
231  raise TheWBConfigFileError(self.error_messages['TheWBConfigFileError'] % (url[7:], ))
232 
233  if self.config['debug_enabled']:
234  print url
235  print
236  try:
237  self.thewb_config = etree.parse(url)
238  except Exception, e:
239  raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
240  return
241  # end getTheWBConfig()
242 
243 
245  '''Read the thewb_config.xml and user preference thewb.xml file.
246  If the thewb.xml file does not exist then create it.
247  If the thewb.xml file is too old then update it.
248  return nothing
249  '''
250  # Get thewb_config.xml
251  self.getTheWBConfig()
252 
253  # Check if the thewb.xml file exists
254  userPreferenceFile = self.thewb_config.find('userPreferenceFile').text
255  if userPreferenceFile[0] == '~':
256  self.thewb_config.find('userPreferenceFile').text = u"%s%s" % (os.path.expanduser(u"~"), userPreferenceFile[1:])
257  if os.path.isfile(self.thewb_config.find('userPreferenceFile').text):
258  # Read the grabber thewb_config.xml configuration file
259  url = u'file://%s' % (self.thewb_config.find('userPreferenceFile').text, )
260  if self.config['debug_enabled']:
261  print url
262  print
263  try:
264  self.userPrefs = etree.parse(url)
265  except Exception, e:
266  raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
267  # Check if the thewb.xml file is too old
268  nextUpdateSecs = int(self.userPrefs.find('updateDuration').text)*86400 # seconds in a day
269  nextUpdate = time.localtime(os.path.getmtime(self.thewb_config.find('userPreferenceFile').text)+nextUpdateSecs)
270  now = time.localtime()
271  if nextUpdate > now:
272  return
273  create = False
274  else:
275  create = True
276 
277  # If required create/update the thewb.xml file
278  self.updateTheWB(create)
279  return
280  # end getUserPreferences()
281 
282  def updateTheWB(self, create=False):
283  ''' Create or update the thewb.xml user preferences file
284  return nothing
285  '''
286  # Read the default user preferences file
287  url = u'file://%s/nv_python_libs/configs/XML/defaultUserPrefs/thewb.xml' % (baseProcessingDir, )
288  if not os.path.isfile(url[7:]):
289  raise TheWBConfigFileError(self.error_messages['TheWBConfigFileError'] % (url[7:], ))
290 
291  if self.config['debug_enabled']:
292  print 'updateTheWB url(%s)' % url
293  print
294  try:
295  userTheWB = etree.parse(url)
296  except Exception, e:
297  raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
298 
299  # Get the current show links from the TheWB web site
300  linksTree = self.common.getUrlData(self.thewb_config.find('treeviewUrls'))
301 
302  if self.config['debug_enabled']:
303  print "create(%s)" % create
304  print "linksTree:"
305  sys.stdout.write(etree.tostring(linksTree, encoding='UTF-8', pretty_print=True))
306  print
307 
308  # Check that at least several show directories were returned
309  if not create:
310  if not len(linksTree.xpath('//results//a')) > 10:
311  return self.userPrefs
312 
313  # Assemble the feeds and formats
314  root = etree.XML(u'<xml></xml>')
315  for directory in linksTree.xpath('//results'):
316  tmpDirectory = etree.SubElement(root, u'showDirectories')
317  tmpDirectory.attrib['name'] = directory.find('name').text
318  for show in directory.xpath('.//a'):
319  showName = show.text
320  # Skip any DVD references as they are not on-line videos
321  if showName.lower().find('dvd') != -1 or show.attrib['href'].lower().find('dvd') != -1:
322  continue
323  tmpShow = etree.XML(u'<url></url>')
324  tmpShow.attrib['enabled'] = u'true'
325  tmpShow.attrib['name'] = self.common.massageText(showName.strip())
326  tmpShow.text = self.common.ampReplace(show.attrib['href'].replace(u'/shows/', u'').replace(u'/', u'').strip())
327  tmpDirectory.append(tmpShow)
328 
329  if self.config['debug_enabled']:
330  print "Before any merging userTheWB:"
331  sys.stdout.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True))
332  print
333 
334  # If there was an existing thewb.xml file then add any relevant user settings to
335  # this new thewb.xml
336  if not create:
337  userTheWB.find('updateDuration').text = self.userPrefs.find('updateDuration').text
338  if self.userPrefs.find('showDirectories').get('globalmax'):
339  root.find('showDirectories').attrib['globalmax'] = self.userPrefs.find('showDirectories').attrib['globalmax']
340  for rss in self.userPrefs.xpath("//url[@enabled='false']"):
341  elements = root.xpath("//url[text()=$URL]", URL=rss.text.strip())
342  if len(elements):
343  elements[0].attrib['enabled'] = u'false'
344  if rss.get('max'):
345  elements[0].attrib['max'] = rss.attrib['max']
346 
347  if self.config['debug_enabled']:
348  print "After any merging userTheWB:"
349  sys.stdout.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True))
350  print
351 
352  # Save the thewb.xml file
353  prefDir = self.thewb_config.find('userPreferenceFile').text.replace(u'/thewb.xml', u'')
354  if not os.path.isdir(prefDir):
355  os.makedirs(prefDir)
356  fd = open(self.thewb_config.find('userPreferenceFile').text, 'w')
357  fd.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True)[:-len(u'</userTheWB>')-1]+u''.join(etree.tostring(element, encoding='UTF-8', pretty_print=True) for element in root.xpath('/xml/*'))+u'</userTheWB>')
358  fd.close()
359 
360  # Input the refreshed user preference data
361  try:
362  self.userPrefs = etree.parse(self.thewb_config.find('userPreferenceFile').text)
363  except Exception, e:
364  raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
365  return
366  # end updateTheWB()
367 
368 
373 
374 
375  def searchTitle(self, title, pagenumber, pagelen, ignoreError=False):
376  '''Key word video search of the TheWB web site
377  return an array of matching item elements
378  return
379  '''
380  orgURL = self.thewb_config.find('searchURLS').xpath(".//href")[0].text
381 
382  try:
383  searchVar = u'?q=%s' % (urllib.quote(title.encode("utf-8")).replace(u' ', u'+'))
384  except UnicodeDecodeError:
385  searchVar = u'?q=%s' % (urllib.quote(title).replace(u' ', u'+'))
386  url = self.thewb_config.find('searchURLS').xpath(".//href")[0].text+searchVar
387 
388  if self.config['debug_enabled']:
389  print "Search url(%s)" % url
390  print
391 
392  self.thewb_config.find('searchURLS').xpath(".//href")[0].text = url
393 
394  # Perform a search
395  try:
396  resultTree = self.common.getUrlData(self.thewb_config.find('searchURLS'), pageFilter=None)
397  except Exception, errormsg:
398  self.thewb_config.find('searchURLS').xpath(".//href")[0].text = orgURL
399  raise TheWBUrlDownloadError(self.error_messages['TheWBUrlDownloadError'] % (errormsg))
400 
401  self.thewb_config.find('searchURLS').xpath(".//href")[0].text = orgURL
402 
403  if self.config['debug_enabled']:
404  print "resultTree count(%s)" % len(resultTree)
405  print etree.tostring(resultTree, encoding='UTF-8', pretty_print=True)
406  print
407 
408  if resultTree is None:
409  if ignoreError:
410  return [None, None]
411  raise TheWBVideoNotFound(u"No TheWB.com Video matches found for search value (%s)" % title)
412 
413  searchResults = resultTree.xpath('//result/div')
414  if not len(searchResults):
415  if ignoreError:
416  return [None, None]
417  raise TheWBVideoNotFound(u"No TheWB.com Video matches found for search value (%s)" % title)
418 
419  # Set the number of search results returned
420  self.channel['channel_numresults'] = len(searchResults)
421 
422  # TheWB search results fo not have a pubDate so use the current data time
423  # e.g. "Sun, 06 Jan 2008 21:44:36 GMT"
424  pubDate = datetime.datetime.now().strftime(self.common.pubDateFormat)
425 
426  # Translate the search results into MNV RSS item format
427  thumbNailFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//img')
428  textFilter = etree.XPath('.//div[@class="overlay-bg-middle"]/p')
429  titleFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//a[@title!=""]/@title')
430  descFilter = etree.XPath('.//div[@class="overlay-bg-middle"]/p[@class="overlay_extra overlay_spacer_top"]/text()')
431  linkFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//a[@title!=""]/@href')
432  itemThumbNail = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces)
433  itemDwnLink = etree.XPath('.//media:content', namespaces=self.common.namespaces)
434  itemDict = {}
435  for result in searchResults:
436  if linkFilter(result) != None: # Make sure that this result actually has a video
437  thewbItem = etree.XML(self.common.mnvItem)
438  # These videos are only viewable in the US so add a country indicator
439  etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = u'us'
440  # Extract and massage data
441  thumbNail = self.common.ampReplace(thumbNailFilter(result)[0].attrib['src'])
442  title = titleFilter(result)[0].strip()
443  link = u'file://%s/nv_python_libs/configs/HTML/thewb.html?videocode=%s' % (baseProcessingDir, result.attrib['id'].replace(u'video_', u''))
444  etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text = 'true'
445  descriptionElement = textFilter(result)[0]
446  description = u''
447  tmptitle = None
448  seasonNum = None
449  episodeNum = None
450  for e in descriptionElement.xpath('./*'):
451  try:
452  eText = unicode(e.tail, 'UTF-8').strip()
453  except:
454  continue
455  if eText.startswith(u'Season ') or eText.startswith(u'EP'):
456  sed = self.getSeasonEpisode(eText)
457  if not len(sed):
458  continue
459  infoList = u'S%02dE%02d' % (int(sed[0]), int(sed[1]))
460  seasonNum = u'%d' % int(sed[0])
461  episodeNum = u'%d' % int(sed[1])
462  if len(sed) == 5:
463  videoSeconds = int(sed[2])*3600+int(sed[3])*60+int(sed[4])
464  itemDwnLink(thewbItem)[0].attrib['duration'] = unicode(videoSeconds)
465  elif len(sed) == 4:
466  videoSeconds = int(sed[2])*60+int(sed[3])
467  itemDwnLink(thewbItem)[0].attrib['duration'] = unicode(videoSeconds)
468 
469  index = title.find(u':')
470  if index != -1:
471  tmptitle = u'%s: %s %s' % (title[:index].strip(), infoList, title[index+1:].strip())
472  else:
473  tmptitle = u'%s: %s' % (title.strip(), infoList)
474  if tmptitle:
475  title = tmptitle
476  title = self.common.massageText(title.strip())
477  description = self.common.massageText(descFilter(result)[0].strip())
478 
479  # Insert data into a new item element
480  thewbItem.find('title').text = title
481  thewbItem.find('author').text = "The WB.com"
482  thewbItem.find('pubDate').text = pubDate
483  thewbItem.find('description').text = description
484  thewbItem.find('link').text = link
485  itemThumbNail(thewbItem)[0].attrib['url'] = thumbNail
486  itemDwnLink(thewbItem)[0].attrib['url'] = link
487  if seasonNum:
488  etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = seasonNum
489  if episodeNum:
490  etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = episodeNum
491  itemDict[title.lower()] = thewbItem
492 
493  if not len(itemDict.keys()):
494  if ignoreError:
495  return [None, None]
496  raise TheWBVideoNotFound(u"No TheWB Video matches found for search value (%s)" % title)
497 
498  return [itemDict, resultTree.xpath('//pageInfo')[0].text]
499  # end searchTitle()
500 
501 
502  def searchForVideos(self, title, pagenumber):
503  """Common name for a video search. Used to interface with MythTV plugin NetVision
504  """
505  # Get thewb_config.xml
506  self.getTheWBConfig()
507 
508  if self.config['debug_enabled']:
509  print "self.thewb_config:"
510  sys.stdout.write(etree.tostring(self.thewb_config, encoding='UTF-8', pretty_print=True))
511  print
512 
513  # Easier for debugging
514 # print self.searchTitle(title, pagenumber, self.page_limit)
515 # print
516 # sys.exit()
517 
518  try:
519  data = self.searchTitle(title, pagenumber, self.page_limit)
520  except TheWBVideoNotFound, msg:
521  sys.stderr.write(u"%s\n" % msg)
522  sys.exit(0)
523  except TheWBUrlError, msg:
524  sys.stderr.write(u'%s\n' % msg)
525  sys.exit(1)
526  except TheWBHttpError, msg:
527  sys.stderr.write(self.error_messages['TheWBHttpError'] % msg)
528  sys.exit(1)
529  except TheWBRssError, msg:
530  sys.stderr.write(self.error_messages['TheWBRssError'] % msg)
531  sys.exit(1)
532  except Exception, e:
533  sys.stderr.write(u"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
534  sys.exit(1)
535 
536  # Create RSS element tree
537  rssTree = etree.XML(self.common.mnvRSS+u'</rss>')
538 
539  # Set the paging values
540  itemCount = len(data[0].keys())
541  if data[1] == 'true':
542  self.channel['channel_returned'] = itemCount
543  self.channel['channel_startindex'] = itemCount
544  self.channel['channel_numresults'] = itemCount+(self.page_limit*(int(pagenumber)-1)+1)
545  else:
546  self.channel['channel_returned'] = itemCount
547  self.channel['channel_startindex'] = self.channel['channel_returned']
548  self.channel['channel_numresults'] = self.channel['channel_returned']
549 
550  # Add the Channel element tree
551  channelTree = self.common.mnvChannelElement(self.channel)
552  rssTree.append(channelTree)
553 
554  lastKey = None
555 
556  for key in sorted(data[0].keys()):
557  if lastKey != key:
558  channelTree.append(data[0][key])
559  lastKey = key
560 
561  # Output the MNV search results
562  sys.stdout.write(u'<?xml version="1.0" encoding="UTF-8"?>\n')
563  sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True))
564  sys.exit(0)
565  # end searchForVideos()
566 
567  def displayTreeView(self):
568  '''Gather the The WB feeds then get a max page of videos meta data in each of them
569  Display the results and exit
570  '''
571  # Get the user preferences that specify which shows and formats they want to be in the treeview
572  try:
573  self.getUserPreferences()
574  except Exception, e:
575  sys.stderr.write(u'%s\n' % e)
576  sys.exit(1)
577 
578  # Verify that there is at least one RSS feed that user wants to download
579  showFeeds = self.userPrefs.xpath("//showDirectories//url[@enabled='true']")
580  totalFeeds = self.userPrefs.xpath("//url[@enabled='true']")
581 
582  if self.config['debug_enabled']:
583  print "self.userPrefs show count(%s) total feed count(%s):" % (len(showFeeds), len(totalFeeds))
584  sys.stdout.write(etree.tostring(self.userPrefs, encoding='UTF-8', pretty_print=True))
585  print
586 
587  if not len(totalFeeds):
588  sys.stderr.write(u'There are no show or treeviewURLS elements "enabled" in your "thewb.xml" user preferences\nfile (%s)\n' % self.thewb_config.find('userPreferenceFile').text)
589  sys.exit(1)
590 
591  # Massage channel icon
592  self.channel_icon = self.common.ampReplace(self.channel_icon)
593 
594  # Create RSS element tree
595  rssTree = etree.XML(self.common.mnvRSS+u'</rss>')
596 
597  # Add the Channel element tree
598  channelTree = self.common.mnvChannelElement(self.channel)
599  rssTree.append(channelTree)
600 
601  # Process any user specified searches
602  showItems = {}
603  if len(showFeeds) != None:
604  for searchDetails in showFeeds:
605  try:
606  data = self.searchTitle(searchDetails.text.strip(), 1, self.page_limit, ignoreError=True)
607  if data[0] == None:
608  continue
609  except TheWBVideoNotFound, msg:
610  sys.stderr.write(u"%s\n" % msg)
611  continue
612  except TheWBUrlError, msg:
613  sys.stderr.write(u'%s\n' % msg)
614  continue
615  except TheWBHttpError, msg:
616  sys.stderr.write(self.error_messages['TheWBHttpError'] % msg)
617  continue
618  except TheWBRssError, msg:
619  sys.stderr.write(self.error_messages['TheWBRssError'] % msg)
620  continue
621  except Exception, e:
622  sys.stderr.write(u"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (searchDetails.text.strip(), e))
623  continue
624  data.append(searchDetails.attrib['name'])
625  showItems[self.common.massageText(searchDetails.text.strip())] = data
626  continue
627 
628  if self.config['debug_enabled']:
629  print "After searches count(%s):" % len(showItems)
630  for key in showItems.keys():
631  print "Show(%s) name(%s) item count(%s)" % (key, showItems[key][2], len(showItems[key][0]))
632  print
633 
634  # Filter out any items that are not specifically for the show
635  for showNameKey in showItems.keys():
636  tmpList = {}
637  for key in showItems[showNameKey][0].keys():
638  tmpLink = showItems[showNameKey][0][key].find('link').text.replace(self.thewb_config.find('searchURLS').xpath(".//href")[0].text, u'')
639  if tmpLink.startswith(showNameKey):
640  tmpList[key] = showItems[showNameKey][0][key]
641  showItems[showNameKey][0] = tmpList
642 
643  if self.config['debug_enabled']:
644  print "After search filter of non-show items count(%s):" % len(showItems)
645  for key in showItems.keys():
646  print "Show(%s) name(%s) item count(%s)" % (key, showItems[key][2], len(showItems[key][0]))
647  print
648 
649  # Create a structure of feeds that concurrently have videos
650  rssData = etree.XML(u'<xml></xml>')
651  rssFeedsUrl = u'http://www.thewb.com/shows/feed/'
652  for feedType in self.userPrefs.findall('showDirectories'):
653  for rssFeed in self.userPrefs.xpath("//showDirectories/url[@enabled='true']"):
654  link = rssFeedsUrl+rssFeed.text
655  urlName = rssFeed.attrib.get('name')
656  if urlName:
657  uniqueName = u'%s;%s' % (urlName, link)
658  else:
659  uniqueName = u'RSS;%s' % (link)
660  url = etree.XML(u'<url></url>')
661  etree.SubElement(url, "name").text = uniqueName
662  etree.SubElement(url, "href").text = link
663  etree.SubElement(url, "filter").text = u"//channel/title"
664  etree.SubElement(url, "filter").text = u"//item"
665  etree.SubElement(url, "parserType").text = u'xml'
666  rssData.append(url)
667 
668  if self.config['debug_enabled']:
669  print "rssData:"
670  sys.stdout.write(etree.tostring(rssData, encoding='UTF-8', pretty_print=True))
671  print
672 
673  # Get the RSS Feed data
674  self.channelLanguage = u'en'
675  self.itemAuthor = u'The WB.com'
676  self.itemFilter = etree.XPath('.//item', namespaces=self.common.namespaces)
677  self.titleFilter = etree.XPath('.//title', namespaces=self.common.namespaces)
678  self.linkFilter = etree.XPath('.//link', namespaces=self.common.namespaces)
679  self.descFilter1 = etree.XPath('.//description', namespaces=self.common.namespaces)
680  self.descFilter2 = etree.XPath("//text()")
681  self.pubdateFilter = etree.XPath('.//pubDate', namespaces=self.common.namespaces)
682  self.thumbNailFilter = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces)
683  self.itemThumbNail = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces)
684  self.itemDwnLink = etree.XPath('.//media:content', namespaces=self.common.namespaces)
685  self.rssName = etree.XPath('title', namespaces=self.common.namespaces)
686  self.feedFilter = etree.XPath('//url[text()=$url]')
687  self.HTMLparser = etree.HTMLParser()
688  if rssData.find('url') != None:
689  try:
690  resultTree = self.common.getUrlData(rssData)
691  except Exception, errormsg:
692  raise TheWBUrlDownloadError(self.error_messages['TheWBUrlDownloadError'] % (errormsg))
693 
694  if self.config['debug_enabled']:
695  print "resultTree:"
696  sys.stdout.write(etree.tostring(resultTree, encoding='UTF-8', pretty_print=True))
697  print
698 
699  # Process each directory of the user preferences that have an enabled rss feed
700  for result in resultTree.findall('results'):
701  names = result.find('name').text.split(u';')
702  names[0] = self.common.massageText(names[0])
703  if names[0] == 'RSS':
704  names[0] = self.common.massageText(self.rssName(result.find('result'))[0].text.strip())
705  urlName = names[0]
706  else:
707  urlName = result.find('url').text.replace(rssFeedsUrl, u'').strip()
708 
709  urlMax = None
710  url = self.feedFilter(self.userPrefs, url=names[1])
711  if len(url):
712  if url[0].attrib.get('max'):
713  try:
714  urlMax = int(url[0].attrib.get('max'))
715  except:
716  pass
717  elif url[0].getparent().attrib.get('globalmax'):
718  try:
719  urlMax = int(url[0].getparent().attrib.get('globalmax'))
720  except:
721  pass
722  if urlMax == 0:
723  urlMax = None
724  if self.config['debug_enabled']:
725  print "Results: #Items(%s) for (%s)" % (len(self.itemFilter(result)), names)
726  print
727  self.createItems(showItems, result, urlName, names[0], urlMax=urlMax)
728  continue
729 
730  # Add all the shows and rss items to the channel
731  for key in sorted(showItems.keys()):
732  if not len(showItems[key][0]):
733  continue
734  # Create a new directory and/or subdirectory if required
735  directoryElement = etree.SubElement(channelTree, u'directory')
736  directoryElement.attrib['name'] = showItems[key][2]
737  directoryElement.attrib['thumbnail'] = self.channel_icon
738 
739  if self.config['debug_enabled']:
740  print "Results: #Items(%s) for (%s)" % (len(showItems[key][0]), showItems[key][2])
741  print
742 
743  # Copy all the items into the MNV RSS directory
744  for itemKey in sorted(showItems[key][0].keys()):
745  directoryElement.append(showItems[key][0][itemKey])
746 
747  if self.config['debug_enabled']:
748  print "Final results: #Items(%s)" % len(rssTree.xpath('//item'))
749  print
750 
751  # Check that there was at least some items
752  if len(rssTree.xpath('//item')):
753  # Output the MNV search results
754  sys.stdout.write(u'<?xml version="1.0" encoding="UTF-8"?>\n')
755  sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True))
756 
757  sys.exit(0)
758  # end displayTreeView()
759 
760  def createItems(self, showItems, result, urlName, showName, urlMax=None):
761  '''Create a dictionary of MNV compliant RSS items from the results of a RSS feed show search.
762  Also merge with any items that were found by using the Web search. Identical items use the RSS
763  feed item data over the search item as RSS provides better results.
764  return nothing as the show item dictionary will have all the results
765  '''
766  # Initalize show if it has not already had a search result
767  if not urlName in showItems.keys():
768  showItems[urlName] = [{}, None, showName]
769 
770  # Convert each RSS item into a MNV item
771  count = 0
772  for thewbItem in self.itemFilter(result):
773  newItem = etree.XML(self.common.mnvItem)
774  # These videos are only viewable in the US so add a country indicator
775  etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = u'us'
776  # Extract and massage data
777  tmpLink = self.linkFilter(thewbItem)[0].text.strip()
778  link = self.common.ampReplace(u'file://%s/nv_python_libs/configs/HTML/thewb.html?videocode=%s' % (baseProcessingDir, tmpLink[tmpLink.rfind(u'/')+1:]))
779  etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text = 'true'
780  # Convert the pubDate '2010-05-02T11:23:25-07:00' to a MNV pubdate format
781  pubdate = self.pubdateFilter(thewbItem)
782  if len(pubdate):
783  pubdate = pubdate[0].text[:-6]
784  pubdate = time.strptime(pubdate, '%Y-%m-%dT%H:%M:%S')
785  pubdate = time.strftime(self.common.pubDateFormat, pubdate)
786  else:
787  pubdate = datetime.datetime.now().strftime(self.common.pubDateFormat)
788  title = self.common.massageText(self.titleFilter(thewbItem)[0].text.strip())
789  tmptitle = None
790  descList = self.descFilter2(etree.parse(StringIO(self.descFilter1(thewbItem)[0].text), self.HTMLparser))
791  description = None
792  seasonNum = None
793  episodeNum = None
794  for eText in descList:
795  if eText == '\n\t':
796  continue
797  eText = eText.strip().encode('UTF-8')
798  if not description:
799  description = eText
800  continue
801  try:
802  if eText.startswith(u'Season: ') or eText.startswith(u'EP: '):
803  s_e = eText.replace(u'Season:',u'').replace(u', Episode:',u'').replace(u'EP:',u'').strip().split(u' ')
804  if len(s_e) == 1 and can_int(s_e[0].strip()):
805  eText = u'Ep(%02d)' % int(s_e[0].strip())
806  episodeNum = s_e[0].strip()
807  elif len(s_e) == 2 and can_int(s_e[0].strip()) and can_int(s_e[1].strip()):
808  eText = u'S%02dE%02d' % (int(s_e[0].strip()), int(s_e[1].strip()))
809  seasonNum = s_e[0].strip()
810  episodeNum = s_e[1].strip()
811  title = title.replace(u'-', u'–')
812  index = title.find(u'–')
813  if index != -1:
814  tmptitle = u'%s: %s %s' % (title[:index].strip(), eText.strip(), title[index:].strip())
815  else:
816  tmptitle = u'%s %s' % (title, eText.strip())
817  continue
818  elif eText.startswith(u'Running Time: '):
819  videoDuration = eText.replace(u'Running Time: ', u'').strip().split(u':')
820  if not len(videoDuration):
821  continue
822  videoSeconds = False
823  try:
824  if len(videoDuration) == 1:
825  videoSeconds = int(videoDuration[0])
826  elif len(videoDuration) == 2:
827  videoSeconds = int(videoDuration[0])*60+int(videoDuration[1])
828  elif len(videoDuration) == 3:
829  videoSeconds = int(videoDuration[0])*3600+int(videoDuration[1])*60+int(videoDuration[2])
830  if videoSeconds:
831  self.itemDwnLink(newItem)[0].attrib['duration'] = unicode(videoSeconds)
832  except:
833  pass
834  except UnicodeDecodeError:
835  continue
836 
837  if tmptitle:
838  title = tmptitle
839  title = self.common.massageText(title.strip())
840  description = self.common.massageText(description.strip())
841  # Insert data into a new item element
842  newItem.find('title').text = title
843  newItem.find('author').text = self.itemAuthor
844  newItem.find('pubDate').text = pubdate
845  newItem.find('description').text = description
846  newItem.find('link').text = link
847  self.itemDwnLink(newItem)[0].attrib['url'] = link
848  try:
849  self.itemThumbNail(newItem)[0].attrib['url'] = self.common.ampReplace(self.itemThumbNail(thewbItem)[0].attrib['url'])
850  except IndexError:
851  pass
852  self.itemDwnLink(newItem)[0].attrib['lang'] = self.channelLanguage
853  if seasonNum:
854  etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = seasonNum
855  if episodeNum:
856  etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = episodeNum
857  # Merge RSS results with search results and override any duplicates with the RSS item
858  showItems[urlName][0][title.lower()] = newItem
859  if urlMax: # Check of the maximum items to processes has been met
860  count+=1
861  if count > urlMax:
862  break
863  return
864  # end createItems()
865 # end Videos() class
def searchForVideos(self, title, pagenumber)
Definition: thewb_api.py:502
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)
def createItems(self, showItems, result, urlName, showName, urlMax=None)
Definition: thewb_api.py:760
def searchTitle(self, title, pagenumber, pagelen, ignoreError=False)
End of Utility functions.
Definition: thewb_api.py:375
def updateTheWB(self, create=False)
Definition: thewb_api.py:282
def __init__(self, apikey, mythtv=True, interactive=False, select_first=False, debug=False, custom_ui=None, language=None, search_all_languages=False)
Definition: thewb_api.py:114
def __init__(self, outstream, encoding=None)
Definition: thewb_api.py:38
def getSeasonEpisode(self, title)
Start - Utility functions.
Definition: thewb_api.py:210