MythTV  master
thewb_api.py
Go to the documentation of this file.
1 # -*- coding: UTF-8 -*-
2 
3 # ----------------------
4 # Name: thewb_api - Simple-to-use Python interface to the The WB RSS feeds (http://www.thewb.com/)
5 # Python Script
6 # Author: R.D. Vaughan
7 # Purpose: This python script is intended to perform a variety of utility functions to search and
8 # access text metadata, video and image URLs from The WB.
9 #
10 # License:Creative Commons GNU GPL v2
11 # (http://creativecommons.org/licenses/GPL/2.0/)
12 #-------------------------------------
13 __title__ ="thewb_api - Simple-to-use Python interface to the The WB RSS feeds (http://www.thewb.com/)"
14 __author__="R.D. Vaughan"
15 __purpose__='''
16 This python script is intended to perform a variety of utility functions to search and access text
17 meta data, video and image URLs from thewb. These routines process RSS feeds provided by The WB
18 (http://www.thewb.com/). The specific "The WB" RSS feeds that are processed are controled through
19 a user XML preference file usually found at "~/.mythtv/MythNetvision/userGrabberPrefs/thewb.xml"
20 '''
21 
22 __version__="v0.1.3"
23 # 0.1.0 Initial development
24 # 0.1.1 Changed the logger to only output to stderr rather than a file
25 # 0.1.2 Removed the need for python MythTV bindings and added "%SHAREDIR%" to icon directory path
26 # 0.1.3 Fixes to accomodate changes to TheWB web site.
27 
28 import os, struct, sys, re, time, datetime, urllib.request, urllib.parse, urllib.error
29 import logging
30 from socket import gethostname, gethostbyname
31 from threading import Thread
32 from copy import deepcopy
33 
34 from .thewb_exceptions import (TheWBUrlError, TheWBHttpError, TheWBRssError, TheWBVideoNotFound, TheWBConfigFileError, TheWBUrlDownloadError)
35 import io
36 
37 class OutStreamEncoder(object):
38  """Wraps a stream with an encoder"""
39  def __init__(self, outstream, encoding=None):
40  self.out = outstream
41  if not encoding:
42  self.encoding = sys.getfilesystemencoding()
43  else:
44  self.encoding = encoding
45 
46  def write(self, obj):
47  """Wraps the output stream, encoding Unicode strings with the specified encoding"""
48  if isinstance(obj, str):
49  obj = obj.encode(self.encoding)
50  self.out.buffer.write(obj)
51 
52  def __getattr__(self, attr):
53  """Delegate everything but write to the stream"""
54  return getattr(self.out, attr)
55 
56 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
57 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
58 
59 
60 try:
61  from io import StringIO
62  from lxml import etree
63 except Exception as e:
64  sys.stderr.write('\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
65  sys.exit(1)
66 
67 # Check that the lxml library is current enough
68 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
69 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
70 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
71 version = ''
72 for digit in etree.LIBXML_VERSION:
73  version+=str(digit)+'.'
74 version = version[:-1]
75 if version < '2.7.2':
76  sys.stderr.write('''
77 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
78  At least "libxml" version 2.7.2 must be installed. Your version is (%s).
79 ''' % version)
80  sys.exit(1)
81 
82 
83 def can_int(x):
84  """Takes a string, checks if it is numeric.
85  >>> _can_int("2")
86  True
87  >>> _can_int("A test")
88  False
89  """
90  if x is None:
91  return False
92  try:
93  int(x)
94  except ValueError:
95  return False
96  else:
97  return True
98 # end _can_int
99 
100 
101 class Videos(object):
102  """Main interface to http://www.thewb.com/
103  This is done to support a common naming framework for all python Netvision plugins no matter their site
104  target.
105 
106  Supports search methods
107  The apikey is a not required to access http://www.thewb.com/
108  """
109  def __init__(self,
110  apikey,
111  mythtv = True,
112  interactive = False,
113  select_first = False,
114  debug = False,
115  custom_ui = None,
116  language = None,
117  search_all_languages = False,
118  ):
119  """apikey (str/unicode):
120  Specify the target site API key. Applications need their own key in some cases
121 
122  mythtv (True/False):
123  When True, the returned meta data is being returned has the key and values massaged to match MythTV
124  When False, the returned meta data is being returned matches what target site returned
125 
126  interactive (True/False): (This option is not supported by all target site apis)
127  When True, uses built-in console UI is used to select the correct show.
128  When False, the first search result is used.
129 
130  select_first (True/False): (This option is not supported currently implemented in any grabbers)
131  Automatically selects the first series search result (rather
132  than showing the user a list of more than one series).
133  Is overridden by interactive = False, or specifying a custom_ui
134 
135  debug (True/False):
136  shows verbose debugging information
137 
138  custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers)
139  A callable subclass of interactive class (overrides interactive option)
140 
141  language (2 character language abbreviation): (This option is not supported by all target site apis)
142  The language of the returned data. Is also the language search
143  uses. Default is "en" (English). For full list, run..
144 
145  search_all_languages (True/False): (This option is not supported by all target site apis)
146  By default, a Netvision grabber will only search in the language specified using
147  the language option. When this is True, it will search for the
148  show in any language
149 
150  """
151  self.config = {}
152 
153  if apikey is not None:
154  self.config['apikey'] = apikey
155  else:
156  pass # TheWB does not require an apikey
157 
158  self.config['debug_enabled'] = debug # show debugging messages
159  self.common = common
160  self.common.debug = debug # Set the common function debug level
161 
162  self.log_name = 'TheWB_Grabber'
163  self.common.logger = self.common.initLogger(path=sys.stderr, log_name=self.log_name)
164  self.logger = self.common.logger # Setups the logger (self.log.debug() etc)
165 
166  self.config['custom_ui'] = custom_ui
167 
168  self.config['interactive'] = interactive
169 
170  self.config['select_first'] = select_first
171 
172  self.config['search_all_languages'] = search_all_languages
173 
174  self.error_messages = {'TheWBUrlError': "! Error: The URL (%s) cause the exception error (%s)\n", 'TheWBHttpError': "! Error: An HTTP communications error with The WB was raised (%s)\n", 'TheWBRssError': "! Error: Invalid RSS meta data\nwas received from The WB error (%s). Skipping item.\n", 'TheWBVideoNotFound': "! Error: Video search with The WB did not return any results (%s)\n", 'TheWBConfigFileError': "! Error: thewb_config.xml file missing\nit should be located in and named as (%s).\n", 'TheWBUrlDownloadError': "! Error: Downloading a RSS feed or Web page (%s).\n", }
175 
176  # Channel details and search results
177  self.channel = {'channel_title': 'The WB', 'channel_link': 'http://www.thewb.com/', 'channel_description': "Watch full episodes of your favorite shows on The WB.com, like Friends, The O.C., Veronica Mars, Pushing Daisies, Smallville, Buffy The Vampire Slayer, One Tree Hill and Gilmore Girls.", 'channel_numresults': 0, 'channel_returned': 1, 'channel_startindex': 0}
178 
179 
180  # Season and Episode detection regex patterns
181  self.s_e_Patterns = [
182  # Season 3: Ep. 13 (01:04:30)
183  re.compile('''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<hours>[0-9]+)\\:(?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
184  # Season 3: Ep. 13 (04:30)
185  re.compile('''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
186  # Season 3: Ep. 13
187  re.compile('''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
188  # Ep. 13 (01:04:30)
189  re.compile('''Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<hours>[0-9]+)\\:(?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
190  # Ep. 13 (04:30)
191  re.compile('''Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
192  # Ep. 13
193  re.compile('''Ep.\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
194  ]
195 
196  self.channel_icon = '%SHAREDIR%/mythnetvision/icons/thewb.png'
197  # end __init__()
198 
199 
204 
205  def getSeasonEpisode(self, title):
206  ''' Check is there is any season or episode number information in an item's title
207  return array of season and/or episode numbers plus any duration in minutes and seconds
208  return array with None values
209  '''
210  s_e = []
211  for index in range(len(self.s_e_Patterns)):
212  match = self.s_e_Patterns[index].match(title)
213  if not match:
214  continue
215  return match.groups()
216  return s_e
217  # end getSeasonEpisode()
218 
219  def getTheWBConfig(self):
220  ''' Read the MNV The WB grabber "thewb_config.xml" configuration file
221  return nothing
222  '''
223  # Read the grabber thewb_config.xml configuration file
224  url = 'file://%s/nv_python_libs/configs/XML/thewb_config.xml' % (baseProcessingDir, )
225  if not os.path.isfile(url[7:]):
226  raise TheWBConfigFileError(self.error_messages['TheWBConfigFileError'] % (url[7:], ))
227 
228  if self.config['debug_enabled']:
229  print(url)
230  print()
231  try:
232  self.thewb_config = etree.parse(url)
233  except Exception as e:
234  raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
235  return
236  # end getTheWBConfig()
237 
238 
240  '''Read the thewb_config.xml and user preference thewb.xml file.
241  If the thewb.xml file does not exist then create it.
242  If the thewb.xml file is too old then update it.
243  return nothing
244  '''
245  # Get thewb_config.xml
246  self.getTheWBConfig()
247 
248  # Check if the thewb.xml file exists
249  userPreferenceFile = self.thewb_config.find('userPreferenceFile').text
250  if userPreferenceFile[0] == '~':
251  self.thewb_config.find('userPreferenceFile').text = "%s%s" % (os.path.expanduser("~"), userPreferenceFile[1:])
252  if os.path.isfile(self.thewb_config.find('userPreferenceFile').text):
253  # Read the grabber thewb_config.xml configuration file
254  url = 'file://%s' % (self.thewb_config.find('userPreferenceFile').text, )
255  if self.config['debug_enabled']:
256  print(url)
257  print()
258  try:
259  self.userPrefs = etree.parse(url)
260  except Exception as e:
261  raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
262  # Check if the thewb.xml file is too old
263  nextUpdateSecs = int(self.userPrefs.find('updateDuration').text)*86400 # seconds in a day
264  nextUpdate = time.localtime(os.path.getmtime(self.thewb_config.find('userPreferenceFile').text)+nextUpdateSecs)
265  now = time.localtime()
266  if nextUpdate > now:
267  return
268  create = False
269  else:
270  create = True
271 
272  # If required create/update the thewb.xml file
273  self.updateTheWB(create)
274  return
275  # end getUserPreferences()
276 
277  def updateTheWB(self, create=False):
278  ''' Create or update the thewb.xml user preferences file
279  return nothing
280  '''
281  # Read the default user preferences file
282  url = 'file://%s/nv_python_libs/configs/XML/defaultUserPrefs/thewb.xml' % (baseProcessingDir, )
283  if not os.path.isfile(url[7:]):
284  raise TheWBConfigFileError(self.error_messages['TheWBConfigFileError'] % (url[7:], ))
285 
286  if self.config['debug_enabled']:
287  print('updateTheWB url(%s)' % url)
288  print()
289  try:
290  userTheWB = etree.parse(url)
291  except Exception as e:
292  raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
293 
294  # Get the current show links from the TheWB web site
295  linksTree = self.common.getUrlData(self.thewb_config.find('treeviewUrls'))
296 
297  if self.config['debug_enabled']:
298  print("create(%s)" % create)
299  print("linksTree:")
300  sys.stdout.write(etree.tostring(linksTree, encoding='UTF-8', pretty_print=True))
301  print()
302 
303  # Check that at least several show directories were returned
304  if not create:
305  if not len(linksTree.xpath('//results//a')) > 10:
306  return self.userPrefs
307 
308  # Assemble the feeds and formats
309  root = etree.XML('<xml></xml>')
310  for directory in linksTree.xpath('//results'):
311  tmpDirectory = etree.SubElement(root, 'showDirectories')
312  tmpDirectory.attrib['name'] = directory.find('name').text
313  for show in directory.xpath('.//a'):
314  showName = show.text
315  # Skip any DVD references as they are not on-line videos
316  if showName.lower().find('dvd') != -1 or show.attrib['href'].lower().find('dvd') != -1:
317  continue
318  tmpShow = etree.XML('<url></url>')
319  tmpShow.attrib['enabled'] = 'true'
320  tmpShow.attrib['name'] = self.common.massageText(showName.strip())
321  tmpShow.text = self.common.ampReplace(show.attrib['href'].replace('/shows/', '').replace('/', '').strip())
322  tmpDirectory.append(tmpShow)
323 
324  if self.config['debug_enabled']:
325  print("Before any merging userTheWB:")
326  sys.stdout.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True))
327  print()
328 
329  # If there was an existing thewb.xml file then add any relevant user settings to
330  # this new thewb.xml
331  if not create:
332  userTheWB.find('updateDuration').text = self.userPrefs.find('updateDuration').text
333  if self.userPrefs.find('showDirectories').get('globalmax'):
334  root.find('showDirectories').attrib['globalmax'] = self.userPrefs.find('showDirectories').attrib['globalmax']
335  for rss in self.userPrefs.xpath("//url[@enabled='false']"):
336  elements = root.xpath("//url[text()=$URL]", URL=rss.text.strip())
337  if len(elements):
338  elements[0].attrib['enabled'] = 'false'
339  if rss.get('max'):
340  elements[0].attrib['max'] = rss.attrib['max']
341 
342  if self.config['debug_enabled']:
343  print("After any merging userTheWB:")
344  sys.stdout.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True))
345  print()
346 
347  # Save the thewb.xml file
348  prefDir = self.thewb_config.find('userPreferenceFile').text.replace('/thewb.xml', '')
349  if not os.path.isdir(prefDir):
350  os.makedirs(prefDir)
351  fd = open(self.thewb_config.find('userPreferenceFile').text, 'w')
352  fd.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True)[:-len('</userTheWB>')-1]+''.join(etree.tostring(element, encoding='UTF-8', pretty_print=True) for element in root.xpath('/xml/*'))+'</userTheWB>')
353  fd.close()
354 
355  # Input the refreshed user preference data
356  try:
357  self.userPrefs = etree.parse(self.thewb_config.find('userPreferenceFile').text)
358  except Exception as e:
359  raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
360  return
361  # end updateTheWB()
362 
363 
368 
369 
370  def searchTitle(self, title, pagenumber, pagelen, ignoreError=False):
371  '''Key word video search of the TheWB web site
372  return an array of matching item elements
373  return
374  '''
375  orgURL = self.thewb_config.find('searchURLS').xpath(".//href")[0].text
376 
377  try:
378  searchVar = '?q=%s' % (urllib.parse.quote(title.encode("utf-8")).replace(' ', '+'))
379  except UnicodeDecodeError:
380  searchVar = '?q=%s' % (urllib.parse.quote(title).replace(' ', '+'))
381  url = self.thewb_config.find('searchURLS').xpath(".//href")[0].text+searchVar
382 
383  if self.config['debug_enabled']:
384  print("Search url(%s)" % url)
385  print()
386 
387  self.thewb_config.find('searchURLS').xpath(".//href")[0].text = url
388 
389  # Perform a search
390  try:
391  resultTree = self.common.getUrlData(self.thewb_config.find('searchURLS'), pageFilter=None)
392  except Exception as errormsg:
393  self.thewb_config.find('searchURLS').xpath(".//href")[0].text = orgURL
394  raise TheWBUrlDownloadError(self.error_messages['TheWBUrlDownloadError'] % (errormsg))
395 
396  self.thewb_config.find('searchURLS').xpath(".//href")[0].text = orgURL
397 
398  if self.config['debug_enabled']:
399  print("resultTree count(%s)" % len(resultTree))
400  print(etree.tostring(resultTree, encoding='UTF-8', pretty_print=True))
401  print()
402 
403  if resultTree is None:
404  if ignoreError:
405  return [None, None]
406  raise TheWBVideoNotFound("No TheWB.com Video matches found for search value (%s)" % title)
407 
408  searchResults = resultTree.xpath('//result/div')
409  if not len(searchResults):
410  if ignoreError:
411  return [None, None]
412  raise TheWBVideoNotFound("No TheWB.com Video matches found for search value (%s)" % title)
413 
414  # Set the number of search results returned
415  self.channel['channel_numresults'] = len(searchResults)
416 
417  # TheWB search results fo not have a pubDate so use the current data time
418  # e.g. "Sun, 06 Jan 2008 21:44:36 GMT"
419  pubDate = datetime.datetime.now().strftime(self.common.pubDateFormat)
420 
421  # Translate the search results into MNV RSS item format
422  thumbNailFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//img')
423  textFilter = etree.XPath('.//div[@class="overlay-bg-middle"]/p')
424  titleFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//a[@title!=""]/@title')
425  descFilter = etree.XPath('.//div[@class="overlay-bg-middle"]/p[@class="overlay_extra overlay_spacer_top"]/text()')
426  linkFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//a[@title!=""]/@href')
427  itemThumbNail = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces)
428  itemDwnLink = etree.XPath('.//media:content', namespaces=self.common.namespaces)
429  itemDict = {}
430  for result in searchResults:
431  if linkFilter(result) is not None: # Make sure that this result actually has a video
432  thewbItem = etree.XML(self.common.mnvItem)
433  # These videos are only viewable in the US so add a country indicator
434  etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = 'us'
435  # Extract and massage data
436  thumbNail = self.common.ampReplace(thumbNailFilter(result)[0].attrib['src'])
437  title = titleFilter(result)[0].strip()
438  link = 'file://%s/nv_python_libs/configs/HTML/thewb.html?videocode=%s' % (baseProcessingDir, result.attrib['id'].replace('video_', ''))
439  etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text = 'true'
440  descriptionElement = textFilter(result)[0]
441  description = ''
442  tmptitle = None
443  seasonNum = None
444  episodeNum = None
445  for e in descriptionElement.xpath('./*'):
446  try:
447  eText = str(e.tail, 'UTF-8').strip()
448  except:
449  continue
450  if eText.startswith('Season ') or eText.startswith('EP'):
451  sed = self.getSeasonEpisode(eText)
452  if not len(sed):
453  continue
454  infoList = 'S%02dE%02d' % (int(sed[0]), int(sed[1]))
455  seasonNum = '%d' % int(sed[0])
456  episodeNum = '%d' % int(sed[1])
457  if len(sed) == 5:
458  videoSeconds = int(sed[2])*3600+int(sed[3])*60+int(sed[4])
459  itemDwnLink(thewbItem)[0].attrib['duration'] = str(videoSeconds)
460  elif len(sed) == 4:
461  videoSeconds = int(sed[2])*60+int(sed[3])
462  itemDwnLink(thewbItem)[0].attrib['duration'] = str(videoSeconds)
463 
464  index = title.find(':')
465  if index != -1:
466  tmptitle = '%s: %s %s' % (title[:index].strip(), infoList, title[index+1:].strip())
467  else:
468  tmptitle = '%s: %s' % (title.strip(), infoList)
469  if tmptitle:
470  title = tmptitle
471  title = self.common.massageText(title.strip())
472  description = self.common.massageText(descFilter(result)[0].strip())
473 
474  # Insert data into a new item element
475  thewbItem.find('title').text = title
476  thewbItem.find('author').text = "The WB.com"
477  thewbItem.find('pubDate').text = pubDate
478  thewbItem.find('description').text = description
479  thewbItem.find('link').text = link
480  itemThumbNail(thewbItem)[0].attrib['url'] = thumbNail
481  itemDwnLink(thewbItem)[0].attrib['url'] = link
482  if seasonNum:
483  etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = seasonNum
484  if episodeNum:
485  etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = episodeNum
486  itemDict[title.lower()] = thewbItem
487 
488  if not len(list(itemDict.keys())):
489  if ignoreError:
490  return [None, None]
491  raise TheWBVideoNotFound("No TheWB Video matches found for search value (%s)" % title)
492 
493  return [itemDict, resultTree.xpath('//pageInfo')[0].text]
494  # end searchTitle()
495 
496 
497  def searchForVideos(self, title, pagenumber):
498  """Common name for a video search. Used to interface with MythTV plugin NetVision
499  """
500  # Get thewb_config.xml
501  self.getTheWBConfig()
502 
503  if self.config['debug_enabled']:
504  print("self.thewb_config:")
505  sys.stdout.write(etree.tostring(self.thewb_config, encoding='UTF-8', pretty_print=True))
506  print()
507 
508  # Easier for debugging
509 # print self.searchTitle(title, pagenumber, self.page_limit)
510 # print
511 # sys.exit()
512 
513  try:
514  data = self.searchTitle(title, pagenumber, self.page_limit)
515  except TheWBVideoNotFound as msg:
516  sys.stderr.write("%s\n" % msg)
517  sys.exit(0)
518  except TheWBUrlError as msg:
519  sys.stderr.write('%s\n' % msg)
520  sys.exit(1)
521  except TheWBHttpError as msg:
522  sys.stderr.write(self.error_messages['TheWBHttpError'] % msg)
523  sys.exit(1)
524  except TheWBRssError as msg:
525  sys.stderr.write(self.error_messages['TheWBRssError'] % msg)
526  sys.exit(1)
527  except Exception as e:
528  sys.stderr.write("! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
529  sys.exit(1)
530 
531  # Create RSS element tree
532  rssTree = etree.XML(self.common.mnvRSS+'</rss>')
533 
534  # Set the paging values
535  itemCount = len(list(data[0].keys()))
536  if data[1] == 'true':
537  self.channel['channel_returned'] = itemCount
538  self.channel['channel_startindex'] = itemCount
539  self.channel['channel_numresults'] = itemCount+(self.page_limit*(int(pagenumber)-1)+1)
540  else:
541  self.channel['channel_returned'] = itemCount
542  self.channel['channel_startindex'] = self.channel['channel_returned']
543  self.channel['channel_numresults'] = self.channel['channel_returned']
544 
545  # Add the Channel element tree
546  channelTree = self.common.mnvChannelElement(self.channel)
547  rssTree.append(channelTree)
548 
549  lastKey = None
550 
551  for key in sorted(data[0].keys()):
552  if lastKey != key:
553  channelTree.append(data[0][key])
554  lastKey = key
555 
556  # Output the MNV search results
557  sys.stdout.write('<?xml version="1.0" encoding="UTF-8"?>\n')
558  sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True))
559  sys.exit(0)
560  # end searchForVideos()
561 
562  def displayTreeView(self):
563  '''Gather the The WB feeds then get a max page of videos meta data in each of them
564  Display the results and exit
565  '''
566  # Get the user preferences that specify which shows and formats they want to be in the treeview
567  try:
568  self.getUserPreferences()
569  except Exception as e:
570  sys.stderr.write('%s\n' % e)
571  sys.exit(1)
572 
573  # Verify that there is at least one RSS feed that user wants to download
574  showFeeds = self.userPrefs.xpath("//showDirectories//url[@enabled='true']")
575  totalFeeds = self.userPrefs.xpath("//url[@enabled='true']")
576 
577  if self.config['debug_enabled']:
578  print("self.userPrefs show count(%s) total feed count(%s):" % (len(showFeeds), len(totalFeeds)))
579  sys.stdout.write(etree.tostring(self.userPrefs, encoding='UTF-8', pretty_print=True))
580  print()
581 
582  if not len(totalFeeds):
583  sys.stderr.write('There are no show or treeviewURLS elements "enabled" in your "thewb.xml" user preferences\nfile (%s)\n' % self.thewb_config.find('userPreferenceFile').text)
584  sys.exit(1)
585 
586  # Massage channel icon
587  self.channel_icon = self.common.ampReplace(self.channel_icon)
588 
589  # Create RSS element tree
590  rssTree = etree.XML(self.common.mnvRSS+'</rss>')
591 
592  # Add the Channel element tree
593  channelTree = self.common.mnvChannelElement(self.channel)
594  rssTree.append(channelTree)
595 
596  # Process any user specified searches
597  showItems = {}
598  if len(showFeeds) is not None:
599  for searchDetails in showFeeds:
600  try:
601  data = self.searchTitle(searchDetails.text.strip(), 1, self.page_limit, ignoreError=True)
602  if data[0] is None:
603  continue
604  except TheWBVideoNotFound as msg:
605  sys.stderr.write("%s\n" % msg)
606  continue
607  except TheWBUrlError as msg:
608  sys.stderr.write('%s\n' % msg)
609  continue
610  except TheWBHttpError as msg:
611  sys.stderr.write(self.error_messages['TheWBHttpError'] % msg)
612  continue
613  except TheWBRssError as msg:
614  sys.stderr.write(self.error_messages['TheWBRssError'] % msg)
615  continue
616  except Exception as e:
617  sys.stderr.write("! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (searchDetails.text.strip(), e))
618  continue
619  data.append(searchDetails.attrib['name'])
620  showItems[self.common.massageText(searchDetails.text.strip())] = data
621  continue
622 
623  if self.config['debug_enabled']:
624  print("After searches count(%s):" % len(showItems))
625  for key in list(showItems.keys()):
626  print("Show(%s) name(%s) item count(%s)" % (key, showItems[key][2], len(showItems[key][0])))
627  print()
628 
629  # Filter out any items that are not specifically for the show
630  for showNameKey in list(showItems.keys()):
631  tmpList = {}
632  for key in list(showItems[showNameKey][0].keys()):
633  tmpLink = showItems[showNameKey][0][key].find('link').text.replace(self.thewb_config.find('searchURLS').xpath(".//href")[0].text, '')
634  if tmpLink.startswith(showNameKey):
635  tmpList[key] = showItems[showNameKey][0][key]
636  showItems[showNameKey][0] = tmpList
637 
638  if self.config['debug_enabled']:
639  print("After search filter of non-show items count(%s):" % len(showItems))
640  for key in list(showItems.keys()):
641  print("Show(%s) name(%s) item count(%s)" % (key, showItems[key][2], len(showItems[key][0])))
642  print()
643 
644  # Create a structure of feeds that concurrently have videos
645  rssData = etree.XML('<xml></xml>')
646  rssFeedsUrl = 'http://www.thewb.com/shows/feed/'
647  for feedType in self.userPrefs.findall('showDirectories'):
648  for rssFeed in self.userPrefs.xpath("//showDirectories/url[@enabled='true']"):
649  link = rssFeedsUrl+rssFeed.text
650  urlName = rssFeed.attrib.get('name')
651  if urlName:
652  uniqueName = '%s;%s' % (urlName, link)
653  else:
654  uniqueName = 'RSS;%s' % (link)
655  url = etree.XML('<url></url>')
656  etree.SubElement(url, "name").text = uniqueName
657  etree.SubElement(url, "href").text = link
658  etree.SubElement(url, "filter").text = "//channel/title"
659  etree.SubElement(url, "filter").text = "//item"
660  etree.SubElement(url, "parserType").text = 'xml'
661  rssData.append(url)
662 
663  if self.config['debug_enabled']:
664  print("rssData:")
665  sys.stdout.write(etree.tostring(rssData, encoding='UTF-8', pretty_print=True))
666  print()
667 
668  # Get the RSS Feed data
669  self.channelLanguage = 'en'
670  self.itemAuthor = 'The WB.com'
671  self.itemFilter = etree.XPath('.//item', namespaces=self.common.namespaces)
672  self.titleFilter = etree.XPath('.//title', namespaces=self.common.namespaces)
673  self.linkFilter = etree.XPath('.//link', namespaces=self.common.namespaces)
674  self.descFilter1 = etree.XPath('.//description', namespaces=self.common.namespaces)
675  self.descFilter2 = etree.XPath("//text()")
676  self.pubdateFilter = etree.XPath('.//pubDate', namespaces=self.common.namespaces)
677  self.thumbNailFilter = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces)
678  self.itemThumbNail = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces)
679  self.itemDwnLink = etree.XPath('.//media:content', namespaces=self.common.namespaces)
680  self.rssName = etree.XPath('title', namespaces=self.common.namespaces)
681  self.feedFilter = etree.XPath('//url[text()=$url]')
682  self.HTMLparser = etree.HTMLParser()
683  if rssData.find('url') is not None:
684  try:
685  resultTree = self.common.getUrlData(rssData)
686  except Exception as errormsg:
687  raise TheWBUrlDownloadError(self.error_messages['TheWBUrlDownloadError'] % (errormsg))
688 
689  if self.config['debug_enabled']:
690  print("resultTree:")
691  sys.stdout.write(etree.tostring(resultTree, encoding='UTF-8', pretty_print=True))
692  print()
693 
694  # Process each directory of the user preferences that have an enabled rss feed
695  for result in resultTree.findall('results'):
696  names = result.find('name').text.split(';')
697  names[0] = self.common.massageText(names[0])
698  if names[0] == 'RSS':
699  names[0] = self.common.massageText(self.rssName(result.find('result'))[0].text.strip())
700  urlName = names[0]
701  else:
702  urlName = result.find('url').text.replace(rssFeedsUrl, '').strip()
703 
704  urlMax = None
705  url = self.feedFilter(self.userPrefs, url=names[1])
706  if len(url):
707  if url[0].attrib.get('max'):
708  try:
709  urlMax = int(url[0].attrib.get('max'))
710  except:
711  pass
712  elif url[0].getparent().attrib.get('globalmax'):
713  try:
714  urlMax = int(url[0].getparent().attrib.get('globalmax'))
715  except:
716  pass
717  if urlMax == 0:
718  urlMax = None
719  if self.config['debug_enabled']:
720  print("Results: #Items(%s) for (%s)" % (len(self.itemFilter(result)), names))
721  print()
722  self.createItems(showItems, result, urlName, names[0], urlMax=urlMax)
723  continue
724 
725  # Add all the shows and rss items to the channel
726  for key in sorted(showItems.keys()):
727  if not len(showItems[key][0]):
728  continue
729  # Create a new directory and/or subdirectory if required
730  directoryElement = etree.SubElement(channelTree, 'directory')
731  directoryElement.attrib['name'] = showItems[key][2]
732  directoryElement.attrib['thumbnail'] = self.channel_icon
733 
734  if self.config['debug_enabled']:
735  print("Results: #Items(%s) for (%s)" % (len(showItems[key][0]), showItems[key][2]))
736  print()
737 
738  # Copy all the items into the MNV RSS directory
739  for itemKey in sorted(showItems[key][0].keys()):
740  directoryElement.append(showItems[key][0][itemKey])
741 
742  if self.config['debug_enabled']:
743  print("Final results: #Items(%s)" % len(rssTree.xpath('//item')))
744  print()
745 
746  # Check that there was at least some items
747  if len(rssTree.xpath('//item')):
748  # Output the MNV search results
749  sys.stdout.write('<?xml version="1.0" encoding="UTF-8"?>\n')
750  sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True))
751 
752  sys.exit(0)
753  # end displayTreeView()
754 
755  def createItems(self, showItems, result, urlName, showName, urlMax=None):
756  '''Create a dictionary of MNV compliant RSS items from the results of a RSS feed show search.
757  Also merge with any items that were found by using the Web search. Identical items use the RSS
758  feed item data over the search item as RSS provides better results.
759  return nothing as the show item dictionary will have all the results
760  '''
761  # Initalize show if it has not already had a search result
762  if not urlName in list(showItems.keys()):
763  showItems[urlName] = [{}, None, showName]
764 
765  # Convert each RSS item into a MNV item
766  count = 0
767  for thewbItem in self.itemFilter(result):
768  newItem = etree.XML(self.common.mnvItem)
769  # These videos are only viewable in the US so add a country indicator
770  etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = 'us'
771  # Extract and massage data
772  tmpLink = self.linkFilter(thewbItem)[0].text.strip()
773  link = self.common.ampReplace('file://%s/nv_python_libs/configs/HTML/thewb.html?videocode=%s' % (baseProcessingDir, tmpLink[tmpLink.rfind('/')+1:]))
774  etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text = 'true'
775  # Convert the pubDate '2010-05-02T11:23:25-07:00' to a MNV pubdate format
776  pubdate = self.pubdateFilter(thewbItem)
777  if len(pubdate):
778  pubdate = pubdate[0].text[:-6]
779  pubdate = time.strptime(pubdate, '%Y-%m-%dT%H:%M:%S')
780  pubdate = time.strftime(self.common.pubDateFormat, pubdate)
781  else:
782  pubdate = datetime.datetime.now().strftime(self.common.pubDateFormat)
783  title = self.common.massageText(self.titleFilter(thewbItem)[0].text.strip())
784  tmptitle = None
785  descList = self.descFilter2(etree.parse(StringIO(self.descFilter1(thewbItem)[0].text), self.HTMLparser))
786  description = None
787  seasonNum = None
788  episodeNum = None
789  for eText in descList:
790  if eText == '\n\t':
791  continue
792  eText = eText.strip().encode('UTF-8')
793  if not description:
794  description = eText
795  continue
796  try:
797  if eText.startswith('Season: ') or eText.startswith('EP: '):
798  s_e = eText.replace('Season:','').replace(', Episode:','').replace('EP:','').strip().split(' ')
799  if len(s_e) == 1 and can_int(s_e[0].strip()):
800  eText = 'Ep(%02d)' % int(s_e[0].strip())
801  episodeNum = s_e[0].strip()
802  elif len(s_e) == 2 and can_int(s_e[0].strip()) and can_int(s_e[1].strip()):
803  eText = 'S%02dE%02d' % (int(s_e[0].strip()), int(s_e[1].strip()))
804  seasonNum = s_e[0].strip()
805  episodeNum = s_e[1].strip()
806  title = title.replace('-', '–')
807  index = title.find('–')
808  if index != -1:
809  tmptitle = '%s: %s %s' % (title[:index].strip(), eText.strip(), title[index:].strip())
810  else:
811  tmptitle = '%s %s' % (title, eText.strip())
812  continue
813  elif eText.startswith('Running Time: '):
814  videoDuration = eText.replace('Running Time: ', '').strip().split(':')
815  if not len(videoDuration):
816  continue
817  videoSeconds = False
818  try:
819  if len(videoDuration) == 1:
820  videoSeconds = int(videoDuration[0])
821  elif len(videoDuration) == 2:
822  videoSeconds = int(videoDuration[0])*60+int(videoDuration[1])
823  elif len(videoDuration) == 3:
824  videoSeconds = int(videoDuration[0])*3600+int(videoDuration[1])*60+int(videoDuration[2])
825  if videoSeconds:
826  self.itemDwnLink(newItem)[0].attrib['duration'] = str(videoSeconds)
827  except:
828  pass
829  except UnicodeDecodeError:
830  continue
831 
832  if tmptitle:
833  title = tmptitle
834  title = self.common.massageText(title.strip())
835  description = self.common.massageText(description.strip())
836  # Insert data into a new item element
837  newItem.find('title').text = title
838  newItem.find('author').text = self.itemAuthor
839  newItem.find('pubDate').text = pubdate
840  newItem.find('description').text = description
841  newItem.find('link').text = link
842  self.itemDwnLink(newItem)[0].attrib['url'] = link
843  try:
844  self.itemThumbNail(newItem)[0].attrib['url'] = self.common.ampReplace(self.itemThumbNail(thewbItem)[0].attrib['url'])
845  except IndexError:
846  pass
847  self.itemDwnLink(newItem)[0].attrib['lang'] = self.channelLanguage
848  if seasonNum:
849  etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = seasonNum
850  if episodeNum:
851  etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = episodeNum
852  # Merge RSS results with search results and override any duplicates with the RSS item
853  showItems[urlName][0][title.lower()] = newItem
854  if urlMax: # Check of the maximum items to processes has been met
855  count+=1
856  if count > urlMax:
857  break
858  return
859  # end createItems()
860 # end Videos() class
nv_python_libs.thewb.thewb_api.Videos.itemThumbNail
itemThumbNail
Definition: thewb_api.py:678
nv_python_libs.thewb.thewb_api.Videos.itemFilter
itemFilter
Definition: thewb_api.py:671
nv_python_libs.thewb.thewb_api.can_int
def can_int(x)
Definition: thewb_api.py:83
nv_python_libs.thewb.thewb_api.Videos.channel
channel
Definition: thewb_api.py:168
nv_python_libs.thewb.thewb_api.Videos.rssName
rssName
Definition: thewb_api.py:680
nv_python_libs.thewb.thewb_api.OutStreamEncoder
Definition: thewb_api.py:37
nv_python_libs.thewb.thewb_api.Videos.createItems
def createItems(self, showItems, result, urlName, showName, urlMax=None)
Definition: thewb_api.py:755
nv_python_libs.thewb.thewb_api.Videos.feedFilter
feedFilter
Definition: thewb_api.py:681
nv_python_libs.thewb.thewb_api.Videos.searchTitle
def searchTitle(self, title, pagenumber, pagelen, ignoreError=False)
End of Utility functions.
Definition: thewb_api.py:370
nv_python_libs.thewb.thewb_api.Videos.descFilter2
descFilter2
Definition: thewb_api.py:675
nv_python_libs.thewb.thewb_api.Videos.getTheWBConfig
def getTheWBConfig(self)
Definition: thewb_api.py:219
nv_python_libs.thewb.thewb_api.Videos.thumbNailFilter
thumbNailFilter
Definition: thewb_api.py:677
nv_python_libs.thewb.thewb_api.OutStreamEncoder.__init__
def __init__(self, outstream, encoding=None)
Definition: thewb_api.py:39
nv_python_libs.thewb.thewb_api.Videos.logger
logger
Definition: thewb_api.py:155
nv_python_libs.thewb.thewb_api.Videos.error_messages
error_messages
Definition: thewb_api.py:165
nv_python_libs.thewb.thewb_api.Videos.HTMLparser
HTMLparser
Definition: thewb_api.py:682
nv_python_libs.thewb.thewb_api.Videos.searchForVideos
def searchForVideos(self, title, pagenumber)
Definition: thewb_api.py:497
nv_python_libs.thewb.thewb_api.Videos.thewb_config
thewb_config
Definition: thewb_api.py:232
nv_python_libs.thewb.thewb_api.Videos.channel_icon
channel_icon
Definition: thewb_api.py:187
nv_python_libs.thewb.thewb_exceptions.TheWBConfigFileError
Definition: thewb_exceptions.py:42
nv_python_libs.thewb.thewb_exceptions.TheWBVideoNotFound
Definition: thewb_exceptions.py:37
nv_python_libs.thewb.thewb_api.Videos.log_name
log_name
Definition: thewb_api.py:153
nv_python_libs.thewb.thewb_api.Videos.pubdateFilter
pubdateFilter
Definition: thewb_api.py:676
nv_python_libs.thewb.thewb_api.Videos.descFilter1
descFilter1
Definition: thewb_api.py:674
nv_python_libs.thewb.thewb_api.Videos.userPrefs
userPrefs
Definition: thewb_api.py:259
nv_python_libs.thewb.thewb_api.Videos.channelLanguage
channelLanguage
Definition: thewb_api.py:669
nv_python_libs.thewb.thewb_api.Videos.updateTheWB
def updateTheWB(self, create=False)
Definition: thewb_api.py:277
print
static void print(const QList< uint > &raw_minimas, const QList< uint > &raw_maximas, const QList< float > &minimas, const QList< float > &maximas)
Definition: vbi608extractor.cpp:29
nv_python_libs.thewb.thewb_api.Videos.getSeasonEpisode
def getSeasonEpisode(self, title)
Start - Utility functions.
Definition: thewb_api.py:205
nv_python_libs.thewb.thewb_api.Videos.itemDwnLink
itemDwnLink
Definition: thewb_api.py:679
nv_python_libs.thewb.thewb_api.OutStreamEncoder.encoding
encoding
Definition: thewb_api.py:42
nv_python_libs.thewb.thewb_api.Videos.itemAuthor
itemAuthor
Definition: thewb_api.py:670
nv_python_libs.thewb.thewb_api.Videos.__init__
def __init__(self, apikey, mythtv=True, interactive=False, select_first=False, debug=False, custom_ui=None, language=None, search_all_languages=False)
Definition: thewb_api.py:109
nv_python_libs.thewb.thewb_exceptions.TheWBUrlError
Definition: thewb_exceptions.py:22
nv_python_libs.thewb.thewb_api.Videos.titleFilter
titleFilter
Definition: thewb_api.py:672
hardwareprofile.distros.all.get
def get()
Definition: all.py:22
nv_python_libs.thewb.thewb_api.OutStreamEncoder.write
def write(self, obj)
Definition: thewb_api.py:46
nv_python_libs.thewb.thewb_api.Videos.config
config
Definition: thewb_api.py:142
nv_python_libs.thewb.thewb_api.Videos.displayTreeView
def displayTreeView(self)
Definition: thewb_api.py:562
nv_python_libs.thewb.thewb_api.Videos.common
common
Definition: thewb_api.py:150
nv_python_libs.thewb.thewb_api.OutStreamEncoder.__getattr__
def __getattr__(self, attr)
Definition: thewb_api.py:52
nv_python_libs.thewb.thewb_api.Videos.getUserPreferences
def getUserPreferences(self)
Definition: thewb_api.py:239
nv_python_libs.thewb.thewb_api.Videos.s_e_Patterns
s_e_Patterns
Definition: thewb_api.py:172
nv_python_libs.thewb.thewb_exceptions.TheWBUrlDownloadError
Definition: thewb_exceptions.py:47
nv_python_libs.thewb.thewb_api.Videos
Definition: thewb_api.py:101
nv_python_libs.thewb.thewb_api.OutStreamEncoder.out
out
Definition: thewb_api.py:40
nv_python_libs.thewb.thewb_api.Videos.linkFilter
linkFilter
Definition: thewb_api.py:673
find
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)
Definition: dvbstreamhandler.cpp:363