MythTV  master
tedtalks_api.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3 # ----------------------
4 # Name: tedtalks_api - Simple-to-use Python interface to the TedTalks RSS feeds
5 # (http://www.ted.com)
6 # Python Script
7 # Author: R.D. Vaughan
8 # Purpose: This python script is intended to perform a variety of utility functions to
9 # search and access text metadata, video and image URLs from TedTalks Web site.
10 #
11 # License:Creative Commons GNU GPL v2
12 # (http://creativecommons.org/licenses/GPL/2.0/)
13 #-------------------------------------
14 __title__ ="tedtalks_api - Simple-to-use Python interface to the TedTalks videos (http://www.ted.com)"
15 __author__="R.D. Vaughan"
16 __purpose__='''
17 This python script is intended to perform a variety of utility functions to search and access text
18 meta data, video and image URLs from the TedTalks Web site. These routines process videos
19 provided by TedTalks (http://www.ted.com). The specific TedTalks RSS feeds that are processed are controled through a user XML preference file usually found at
20 "~/.mythtv/MythNetvision/userGrabberPrefs/tedtalks.xml"
21 '''
22 
23 __version__="v0.1.0"
24 # 0.1.0 Initial development
25 
26 import os, struct, sys, re, time, datetime, shutil, urllib
27 from string import capitalize
28 import logging
29 from threading import Thread
30 from copy import deepcopy
31 from operator import itemgetter, attrgetter
32 
33 from tedtalks_exceptions import (TedTalksUrlError, TedTalksHttpError, TedTalksRssError, TedTalksVideoNotFound, TedTalksConfigFileError, TedTalksUrlDownloadError)
34 
35 class OutStreamEncoder(object):
36  """Wraps a stream with an encoder"""
37  def __init__(self, outstream, encoding=None):
38  self.out = outstream
39  if not encoding:
40  self.encoding = sys.getfilesystemencoding()
41  else:
42  self.encoding = encoding
43 
44  def write(self, obj):
45  """Wraps the output stream, encoding Unicode strings with the specified encoding"""
46  if isinstance(obj, unicode):
47  try:
48  self.out.write(obj.encode(self.encoding))
49  except IOError:
50  pass
51  else:
52  try:
53  self.out.write(obj)
54  except IOError:
55  pass
56 
57  def __getattr__(self, attr):
58  """Delegate everything but write to the stream"""
59  return getattr(self.out, attr)
60 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
61 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
62 
63 
64 try:
65  from StringIO import StringIO
66  from lxml import etree
67 except Exception, e:
68  sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
69  sys.exit(1)
70 
71 # Check that the lxml library is current enough
72 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
73 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
74 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
75 version = ''
76 for digit in etree.LIBXML_VERSION:
77  version+=str(digit)+'.'
78 version = version[:-1]
79 if version < '2.7.2':
80  sys.stderr.write(u'''
81 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
82  At least "libxml" version 2.7.2 must be installed. Your version is (%s).
83 ''' % version)
84  sys.exit(1)
85 
86 # Used for debugging
87 #import nv_python_libs.mashups.mashups_api as target
88 try:
89  '''Import the python mashups support classes
90  '''
91  import nv_python_libs.mashups.mashups_api as mashups_api
92 except Exception, e:
93  sys.stderr.write('''
94 The subdirectory "nv_python_libs/mashups" containing the modules mashups_api and
95 mashups_exceptions.py (v0.1.0 or greater),
96 They should have been included with the distribution of tedtalks.py.
97 Error(%s)
98 ''' % e)
99  sys.exit(1)
100 if mashups_api.__version__ < '0.1.0':
101  sys.stderr.write("\n! Error: Your current installed mashups_api.py version is (%s)\nYou must at least have version (0.1.0) or higher.\n" % mashups_api.__version__)
102  sys.exit(1)
103 
104 
105 class Videos(object):
106  """Main interface to http://www.ted.com
107  This is done to support a common naming framework for all python Netvision plugins no matter their
108  site target.
109 
110  Supports search methods
111  The apikey is a not required to access http://www.ted.com
112  """
113  def __init__(self,
114  apikey,
115  mythtv = True,
116  interactive = False,
117  select_first = False,
118  debug = False,
119  custom_ui = None,
120  language = None,
121  search_all_languages = False,
122  ):
123  """apikey (str/unicode):
124  Specify the target site API key. Applications need their own key in some cases
125 
126  mythtv (True/False):
127  When True, the returned meta data is being returned has the key and values massaged to match MythTV
128  When False, the returned meta data is being returned matches what target site returned
129 
130  interactive (True/False): (This option is not supported by all target site apis)
131  When True, uses built-in console UI is used to select the correct show.
132  When False, the first search result is used.
133 
134  select_first (True/False): (This option is not supported currently implemented in any grabbers)
135  Automatically selects the first series search result (rather
136  than showing the user a list of more than one series).
137  Is overridden by interactive = False, or specifying a custom_ui
138 
139  debug (True/False):
140  shows verbose debugging information
141 
142  custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers)
143  A callable subclass of interactive class (overrides interactive option)
144 
145  language (2 character language abbreviation): (This option is not supported by all target site apis)
146  The language of the returned data. Is also the language search
147  uses. Default is "en" (English). For full list, run..
148 
149  search_all_languages (True/False): (This option is not supported by all target site apis)
150  By default, a Netvision grabber will only search in the language specified using
151  the language option. When this is True, it will search for the
152  show in any language
153 
154  """
155  self.config = {}
156 
157  if apikey is not None:
158  self.config['apikey'] = apikey
159  else:
160  pass # TedTalks does not require an apikey
161 
162  self.config['debug_enabled'] = debug # show debugging messages
163  self.common = common
164  self.common.debug = debug # Set the common function debug level
165 
166  self.log_name = u'TedTalks_Grabber'
167  self.common.logger = self.common.initLogger(path=sys.stderr, log_name=self.log_name)
168  self.logger = self.common.logger # Setups the logger (self.log.debug() etc)
170  self.config['custom_ui'] = custom_ui
171 
172  self.config['interactive'] = interactive
173 
174  self.config['select_first'] = select_first
175 
176  self.config['search_all_languages'] = search_all_languages
177 
178  self.error_messages = {'TedTalksUrlError': u"! Error: The URL (%s) cause the exception error (%s)\n", 'TedTalksHttpError': u"! Error: An HTTP communications error with the TedTalks was raised (%s)\n", 'TedTalksRssError': u"! Error: Invalid RSS meta data\nwas received from the TedTalks error (%s). Skipping item.\n", 'TedTalksVideoNotFound': u"! Error: Video search with the TedTalks did not return any results (%s)\n", 'TedTalksConfigFileError': u"! Error: tedtalks_config.xml file missing\nit should be located in and named as (%s).\n", 'TedTalksUrlDownloadError': u"! Error: Downloading a RSS feed or Web page (%s).\n", }
179 
180  # Channel details and search results
181  self.channel = {'channel_title': u'TedTalks', 'channel_link': u'http://www.ted.com', 'channel_description': u"TED is a small nonprofit devoted to Ideas Worth Spreading.", 'channel_numresults': 0, 'channel_returned': 1, u'channel_startindex': 0}
182 
183  self.channel_icon = u'%SHAREDIR%/mythnetvision/icons/tedtalks.png'
184 
185  self.config[u'image_extentions'] = ["png", "jpg", "bmp"] # Acceptable image extentions
186 
187  # Initialize Mashups api variables
188  mashups_api.common = self.common
189  self.mashups_api = mashups_api.Videos(u'')
190  self.mashups_api.channel = self.channel
191  if language:
192  self.mashups_api.config['language'] = self.config['language']
193  self.mashups_api.config['debug_enabled'] = self.config['debug_enabled']
194  self.mashups_api.getUserPreferences = self.getUserPreferences
195  # end __init__()
196 
197 
202 
203  def getTedTalksConfig(self):
204  ''' Read the MNV TedTalks grabber "tedtalks_config.xml" configuration file
205  return nothing
206  '''
207  # Read the grabber tedtalks_config.xml configuration file
208  url = u'file://%s/nv_python_libs/configs/XML/tedtalks_config.xml' % (baseProcessingDir, )
209  if not os.path.isfile(url[7:]):
210  raise TedTalksConfigFileError(self.error_messages['TedTalksConfigFileError'] % (url[7:], ))
211 
212  if self.config['debug_enabled']:
213  print url
214  print
215  try:
216  self.tedtalks_config = etree.parse(url)
217  except Exception, errormsg:
218  raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, errormsg))
219  return
220  # end getTedTalksConfig()
221 
222 
224  '''Read the tedtalks_config.xml and user preference tedtalks.xml file.
225  If the tedtalks.xml file does not exist then create it.
226  If the tedtalks.xml file is too old then update it.
227  return nothing
228  '''
229  # Get tedtalks_config.xml
230  self.getTedTalksConfig()
231 
232  # Check if the tedtalks.xml file exists
233  userPreferenceFile = self.tedtalks_config.find('userPreferenceFile').text
234  if userPreferenceFile[0] == '~':
235  self.tedtalks_config.find('userPreferenceFile').text = u"%s%s" % (os.path.expanduser(u"~"), userPreferenceFile[1:])
236  if os.path.isfile(self.tedtalks_config.find('userPreferenceFile').text):
237  # Read the grabber tedtalks_config.xml configuration file
238  url = u'file://%s' % (self.tedtalks_config.find('userPreferenceFile').text, )
239  if self.config['debug_enabled']:
240  print url
241  print
242  try:
243  self.userPrefs = etree.parse(url)
244  except Exception, errormsg:
245  raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, errormsg))
246  create = False
247  else:
248  create = True
249 
250  # If required create/update the tedtalks.xml file
251  self.updateTedTalks(create)
252  return
253  # end getUserPreferences()
254 
255  def updateTedTalks(self, create=False):
256  ''' Create or update the tedtalks.xml user preferences file
257  return nothing
258  '''
259  userDefaultFile = u'%s/nv_python_libs/configs/XML/defaultUserPrefs/tedtalks.xml' % (baseProcessingDir, )
260  if os.path.isfile(userDefaultFile):
261  # Read the default tedtalks.xml user preferences file
262  url = u'file://%s' % (userDefaultFile, )
263  if self.config['debug_enabled']:
264  print url
265  print
266  try:
267  userTedTalks = etree.parse(url)
268  except Exception, e:
269  raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, e))
270  else:
271  raise Exception(u'!Error: The default TedTalk file is missing (%s)', userDefaultFile)
272 
273  # If there was an existing tedtalks.xml file then add any relevant user settings
274  # to this new tedtalks.xml
275  if not create:
276  for showElement in self.userPrefs.xpath("//sourceURL"):
277  showName = showElement.getparent().attrib['name']
278  sourceName = showElement.attrib['name']
279  elements = userTedTalks.xpath("//sourceURL[@name=$showName]", showName=showName,)
280  if len(elements):
281  elements[0].attrib['enabled'] = showElement.attrib['enabled']
282  elements[0].attrib['parameter'] = showElement.attrib['parameter']
283 
284  if self.config['debug_enabled']:
285  print "After any merging userTedTalks:"
286  sys.stdout.write(etree.tostring(userTedTalks, encoding='UTF-8', pretty_print=True))
287  print
288 
289  # Save the tedtalks.xml file
290  prefDir = self.tedtalks_config.find('userPreferenceFile').text.replace(u'/tedtalks.xml', u'')
291  if not os.path.isdir(prefDir):
292  os.makedirs(prefDir)
293  fd = open(self.tedtalks_config.find('userPreferenceFile').text, 'w')
294  fd.write(etree.tostring(userTedTalks, encoding='UTF-8', pretty_print=True))
295  fd.close()
296 
297  # Read the refreshed user config file
298  try:
299  self.userPrefs = etree.parse(self.tedtalks_config.find('userPreferenceFile').text)
300  self.mashups_api.userPrefs = self.userPrefs
301  except Exception, errormsg:
302  raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, errormsg))
303  return
304  # end updateTedTalks()
305 
306 
311 
312  def searchTitle(self, title, pagenumber, pagelen):
313  '''Key word video search of the TedTalks web site
314  return an array of matching item elements
315  return
316  '''
317  searchVar = self.tedtalks_config.find('searchURLS').xpath(".//href")[0].text
318  try:
319  searchVar = searchVar.replace(u'SEARCHTERM', urllib.quote_plus(title.encode("utf-8")))
320  searchVar = searchVar.replace(u'PAGENUM', unicode(pagenumber))
321  except UnicodeDecodeError:
322  searchVar = u'?q=%s' % ()
323  searchVar = searchVar.replace(u'SEARCHTERM', urllib.quote_plus(title))
324  searchVar = searchVar.replace(u'PAGENUM', unicode(pagenumber))
325  url = searchVar
326 
327  if self.config['debug_enabled']:
328  print url
329  print
330 
331  self.tedtalks_config.find('searchURLS').xpath(".//href")[0].text = url
332 
333  # Globally add all the xpath extentions to the "mythtv" namespace allowing access within the
334  # XSLT stylesheets
335  self.common.buildFunctionDict()
336  mnvXpath = etree.FunctionNamespace('http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format')
337  mnvXpath.prefix = 'mnvXpath'
338  for key in self.common.functionDict.keys():
339  mnvXpath[key] = common.functionDict[key]
340 
341  # Add the parameter element from the User preferences file
342  paraMeter = self.userPrefs.find('search').xpath("//search//sourceURL[@enabled='true']/@parameter")
343  if not len(paraMeter):
344  raise Exception(u'TedTalks User preferences file "tedtalks.xml" does not have an enabled search with a "parameter" attribute.')
345  etree.SubElement(self.tedtalks_config.find('searchURLS').xpath(".//url")[0], "parameter").text = paraMeter[0]
346 
347  # Perform a search
348  try:
349  resultTree = self.common.getUrlData(self.tedtalks_config.find('searchURLS'))
350  except Exception, errormsg:
351  raise TedTalksUrlDownloadError(self.error_messages['TedTalksUrlDownloadError'] % (errormsg))
352 
353  if resultTree is None:
354  raise TedTalksVideoNotFound(u"No TedTalks Video matches found for search value (%s)" % title)
355 
356  searchResults = resultTree.xpath('//result//item')
357  if not len(searchResults):
358  raise TedTalksVideoNotFound(u"No TedTalks Video matches found for search value (%s)" % title)
359 
360  return searchResults
361  # end searchTitle()
362 
363 
364  def searchForVideos(self, title, pagenumber):
365  """Common name for a video search. Used to interface with MythTV plugin NetVision
366  """
367  # Get tedtalks_config.xml
368  self.getUserPreferences()
369 
370  if self.config['debug_enabled']:
371  print "self.tedtalks_config:"
372  sys.stdout.write(etree.tostring(self.tedtalks_config, encoding='UTF-8', pretty_print=True))
373  print
374 
375  # Easier for debugging
376 # print self.searchTitle(title, pagenumber, self.page_limit)
377 # print
378 # sys.exit()
379 
380  try:
381  data = self.searchTitle(title, pagenumber, self.page_limit)
382  except TedTalksVideoNotFound, msg:
383  sys.stderr.write(u"%s\n" % msg)
384  sys.exit(0)
385  except TedTalksUrlError, msg:
386  sys.stderr.write(u'%s\n' % msg)
387  sys.exit(1)
388  except TedTalksHttpError, msg:
389  sys.stderr.write(self.error_messages['TedTalksHttpError'] % msg)
390  sys.exit(1)
391  except TedTalksRssError, msg:
392  sys.stderr.write(self.error_messages['TedTalksRssError'] % msg)
393  sys.exit(1)
394  except Exception, e:
395  sys.stderr.write(u"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
396  sys.exit(1)
397 
398  # Create RSS element tree
399  rssTree = etree.XML(self.common.mnvRSS+u'</rss>')
400 
401  # Set the paging values
402  if len(data) == self.page_limit:
403  self.channel['channel_returned'] = len(data)
404  self.channel['channel_startindex'] = len(data)+(self.page_limit*(int(pagenumber)-1))
405  self.channel['channel_numresults'] = len(data)+(self.page_limit*(int(pagenumber)-1)+1)
406  else:
407  self.channel['channel_returned'] = len(data)+(self.page_limit*(int(pagenumber)-1))
408  self.channel['channel_startindex'] = len(data)
409  self.channel['channel_numresults'] = len(data)
410 
411  # Add the Channel element tree
412  channelTree = self.common.mnvChannelElement(self.channel)
413  rssTree.append(channelTree)
414 
415  for item in data:
416  channelTree.append(item)
417 
418  # Output the MNV search results
419  sys.stdout.write(u'<?xml version="1.0" encoding="UTF-8"?>\n')
420  sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True))
421  sys.exit(0)
422  # end searchForVideos()
423 
424  def displayTreeView(self):
425  '''Gather all videos for each TedTalks show
426  Display the results and exit
427  '''
428  self.mashups_api.page_limit = self.page_limit
429  self.mashups_api.grabber_title = self.grabber_title
430  self.mashups_api.mashup_title = self.mashup_title
431  self.mashups_api.channel_icon = self.channel_icon
432  self.mashups_api.mashup_title = u'tedtalks'
433 
434  # Easier for debugging
435 # self.mashups_api.displayTreeView()
436 # print
437 # sys.exit(1)
438 
439  try:
440  self.mashups_api.Search = False
442  except Exception, e:
443  sys.stderr.write(u"! Error: During a TedTalks Video treeview\nError(%s)\n" % (e))
444  sys.exit(1)
445 
446  sys.exit(0)
447  # end displayTreeView()
448 # end Videos() class
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)
def getTedTalksConfig(self)
Start - Utility functions.
def __init__(self, apikey, mythtv=True, interactive=False, select_first=False, debug=False, custom_ui=None, language=None, search_all_languages=False)
def searchTitle(self, title, pagenumber, pagelen)
End of Utility functions.
def __init__(self, outstream, encoding=None)
Definition: tedtalks_api.py:37
def searchForVideos(self, title, pagenumber)