14 __title__ =
"hulu_api - Simple-to-use Python interface to the Hulu RSS feeds (http://www.hulu.com/)"
15 __author__=
"R.D. Vaughan"
17 This python script is intended to perform a variety of utility functions to search and access text
18 meta data, video and image URLs from the Hulu Web site. These routines process RSS feeds
19 provided by Hulu (http://www.hulu.com/). The specific Hulu RSS feeds that are processed are controled through a user XML preference file usually found at
20 "~/.mythtv/MythNetvision/userGrabberPrefs/hulu.xml"
29 import os, struct, sys, re, time, datetime, shutil, urllib.request, urllib.parse, urllib.error
31 from socket
import gethostname, gethostbyname
32 from threading
import Thread
33 from copy
import deepcopy
34 from operator
import itemgetter, attrgetter
36 from .hulu_exceptions
import (HuluUrlError, HuluHttpError, HuluRssError, HuluVideoNotFound, HuluConfigFileError, HuluUrlDownloadError)
40 """Wraps a stream with an encoder"""
49 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
50 if isinstance(obj, str):
52 self.
out.buffer.write(obj)
55 """Delegate everything but write to the stream"""
56 return getattr(self.
out, attr)
58 if isinstance(sys.stdout, io.TextIOWrapper):
64 from io
import StringIO
65 from lxml
import etree
66 except Exception
as e:
67 sys.stderr.write(
'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
72 """Main interface to http://www.hulu.com/
73 This is done to support a common naming framework for all python Netvision plugins no matter their
76 Supports search methods
77 The apikey is a not required to access http://www.hulu.com/
87 search_all_languages = False,
89 """apikey (str/unicode):
90 Specify the target site API key. Applications need their own key in some cases
93 When True, the returned meta data is being returned has the key and values massaged to match MythTV
94 When False, the returned meta data is being returned matches what target site returned
96 interactive (True/False): (This option is not supported by all target site apis)
97 When True, uses built-in console UI is used to select the correct show.
98 When False, the first search result is used.
100 select_first (True/False): (This option is not supported currently implemented in any grabbers)
101 Automatically selects the first series search result (rather
102 than showing the user a list of more than one series).
103 Is overridden by interactive = False, or specifying a custom_ui
106 shows verbose debugging information
108 custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers)
109 A callable subclass of interactive class (overrides interactive option)
111 language (2 character language abbreviation): (This option is not supported by all target site apis)
112 The language of the returned data. Is also the language search
113 uses. Default is "en" (English). For full list, run..
115 search_all_languages (True/False): (This option is not supported by all target site apis)
116 By default, a Netvision grabber will only search in the language specified using
117 the language option. When this is True, it will search for the
123 if apikey
is not None:
124 self.
config[
'apikey'] = apikey
128 self.
config[
'debug_enabled'] = debug
136 self.
config[
'custom_ui'] = custom_ui
140 self.
config[
'select_first'] = select_first
142 self.
config[
'search_all_languages'] = search_all_languages
144 self.
error_messages = {
'HuluUrlError':
"! Error: The URL (%s) cause the exception error (%s)\n",
'HuluHttpError':
"! Error: An HTTP communications error with the Hulu was raised (%s)\n",
'HuluRssError':
"! Error: Invalid RSS meta data\nwas received from the Hulu error (%s). Skipping item.\n",
'HuluVideoNotFound':
"! Error: Video search with the Hulu did not return any results (%s)\n",
'HuluConfigFileError':
"! Error: hulu_config.xml file missing\nit should be located in and named as (%s).\n",
'HuluUrlDownloadError':
"! Error: Downloading a RSS feed or Web page (%s).\n", }
147 self.
channel = {
'channel_title':
'Hulu',
'channel_link':
'http://www.hulu.com/',
'channel_description':
"Hulu.com is a free online video service that offers hit TV shows including Family Guy, 30 Rock, and the Daily Show with Jon Stewart, etc.",
'channel_numresults': 0,
'channel_returned': 1,
'channel_startindex': 0}
152 re.compile(
'''^.+?[Ss](?P<seasno>[0-9]+).*.+?[Ee](?P<epno>[0-9]+).*$''', re.UNICODE),
154 re.compile(
'''^.+?season\\ (?P<seasno>[0-9]+).*.+?episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
156 re.compile(
'''(?P<seriesname>[^_]+)\\_(?P<seasno>[0-9]+)\\_(?P<epno>[0-9]+).*$''', re.UNICODE),
159 self.
channel_icon =
'%SHAREDIR%/mythnetvision/icons/hulu.png'
161 self.
config[
'image_extentions'] = [
"png",
"jpg",
"bmp"]
171 ''' Read the MNV Hulu grabber "hulu_config.xml" configuration file
175 url =
'file://%s/nv_python_libs/configs/XML/hulu_config.xml' % (baseProcessingDir, )
176 if not os.path.isfile(url[7:]):
179 if self.
config[
'debug_enabled']:
184 except Exception
as e:
191 '''Read the hulu_config.xml and user preference hulu.xml file.
192 If the hulu.xml file does not exist then copy the default.
200 if userPreferenceFile[0] ==
'~':
201 self.
hulu_config.
find(
'userPreferenceFile').text =
"%s%s" % (os.path.expanduser(
"~"), userPreferenceFile[1:])
204 if not os.path.isfile(self.
hulu_config.
find(
'userPreferenceFile').text):
206 prefDir = self.
hulu_config.
find(
'userPreferenceFile').text.replace(
'/hulu.xml',
'')
207 if not os.path.isdir(prefDir):
209 defaultConfig =
'%s/nv_python_libs/configs/XML/defaultUserPrefs/hulu.xml' % (baseProcessingDir, )
210 shutil.copy2(defaultConfig, self.
hulu_config.
find(
'userPreferenceFile').text)
213 url =
'file://%s' % (self.
hulu_config.
find(
'userPreferenceFile').text, )
214 if self.
config[
'debug_enabled']:
219 except Exception
as e:
225 ''' Check is there is any season or episode number information in an item's title
226 return array of season and/or episode numbers, Series name (only if title empty)
227 return array with None values
229 s_e = [
None,
None,
None]
233 s_e[0], s_e[1] = match.groups()
234 if not s_e[0]
and desc:
237 s_e[0], s_e[1] = match.groups()
238 if thumbnail
and not title:
239 filepath, filename = os.path.split( thumbnail.replace(
'http:/',
'') )
242 s_e[2], s_e[0], s_e[1] = match.groups()
243 s_e[0] =
'%s' % int(s_e[0])
244 s_e[1] =
'%s' % int(s_e[1])
245 s_e[2] =
"".join([w.capitalize()
for w
in re.split(re.compile(
r"[\W_]*"), s_e[2].replace(
'_',
' ').replace(
'-',
' '))])
257 '''Key word video search of the Hulu web site
258 return an array of matching item elements
264 url = self.
hulu_config.
find(
'searchURLS').xpath(
".//href")[0].text.replace(
'PAGENUM', str(pagenumber)).replace(
'SEARCHTERM', urllib.parse.quote_plus(title.encode(
"utf-8")))
266 if self.
config[
'debug_enabled']:
275 except Exception
as errormsg:
283 if resultTree
is None:
284 raise HuluVideoNotFound(
"No Hulu Video matches found for search value (%s)" % title)
286 searchResults = resultTree.xpath(
'//result//a[@href!="#"]')
287 if not len(searchResults):
288 raise HuluVideoNotFound(
"No Hulu Video matches found for search value (%s)" % title)
290 if self.
config[
'debug_enabled']:
291 print(
"resultTree: count(%s)" % len(searchResults))
296 pubDate = datetime.datetime.now().strftime(self.
common.pubDateFormat)
299 titleFilter = etree.XPath(
".//img")
300 thumbnailFilter = etree.XPath(
".//img")
301 itemLink = etree.XPath(
'.//media:content', namespaces=self.
common.namespaces)
302 itemThumbnail = etree.XPath(
'.//media:thumbnail', namespaces=self.
common.namespaces)
304 for result
in searchResults:
305 tmpLink = result.attrib[
'href']
308 huluItem = etree.XML(self.
common.mnvItem)
310 link = self.
common.ampReplace(tmpLink)
311 tmpTitleText = titleFilter(result)[0].attrib[
'alt'].strip()
312 tmpList = tmpTitleText.split(
':')
313 title = self.
common.massageText(tmpList[0].strip())
315 description = self.
common.massageText(tmpList[1].strip())
320 huluItem.find(
'title').text = title
321 huluItem.find(
'author').text =
'Hulu'
322 huluItem.find(
'pubDate').text = pubDate
323 huluItem.find(
'description').text = description
324 huluItem.find(
'link').text = link
325 itemThumbnail(huluItem)[0].attrib[
'url'] = self.
common.ampReplace(thumbnailFilter(result)[0].attrib[
'src'])
326 itemLink(huluItem)[0].attrib[
'url'] = link
327 etree.SubElement(huluItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text =
'us'
328 s_e = self.
getSeasonEpisode(title, description, itemThumbnail(huluItem)[0].attrib[
'url'])
330 etree.SubElement(huluItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = s_e[0]
332 etree.SubElement(huluItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = s_e[1]
333 if not title
and s_e[2]:
334 huluItem.find(
'title').text = s_e[2]
335 itemDict[link] = huluItem
337 if not len(list(itemDict.keys())):
338 raise HuluVideoNotFound(
"No Hulu Video matches found for search value (%s)" % title)
341 self.
channel[
'channel_numresults'] = len(itemDict)
344 lastPage = resultTree.xpath(
'//result//a[@alt="Go to the last page"]')
348 if pagenumber < lastPage[0].text:
353 return [itemDict, morePages]
358 """Common name for a video search. Used to interface with MythTV plugin NetVision
363 except Exception
as e:
364 sys.stderr.write(
'%s' % e)
367 if self.
config[
'debug_enabled']:
368 print(
"self.userPrefs:")
369 sys.stdout.write(etree.tostring(self.
userPrefs, encoding=
'UTF-8', pretty_print=
True))
379 data = self.
searchTitle(title, pagenumber, self.page_limit)
380 except HuluVideoNotFound
as msg:
381 sys.stderr.write(
"%s\n" % msg)
383 except HuluUrlError
as msg:
384 sys.stderr.write(
'%s\n' % msg)
386 except HuluHttpError
as msg:
389 except HuluRssError
as msg:
392 except Exception
as e:
393 sys.stderr.write(
"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
396 if self.
config[
'debug_enabled']:
397 print(
"Number of items returned by the search(%s)" % len(list(data[0].keys())))
398 sys.stdout.write(etree.tostring(self.
userPrefs, encoding=
'UTF-8', pretty_print=
True))
402 rssTree = etree.XML(self.
common.mnvRSS+
'</rss>')
405 itemCount = len(list(data[0].keys()))
407 self.
channel[
'channel_returned'] = itemCount
408 self.
channel[
'channel_startindex'] = self.page_limit*(int(pagenumber)-1)
409 self.
channel[
'channel_numresults'] = itemCount+(self.page_limit*(int(pagenumber)-1)+1)
411 self.
channel[
'channel_returned'] = itemCount
412 self.
channel[
'channel_startindex'] = itemCount+(self.page_limit*(int(pagenumber)-1))
413 self.
channel[
'channel_numresults'] = itemCount+(self.page_limit*(int(pagenumber)-1))
417 rssTree.append(channelTree)
420 for key
in sorted(data[0].keys()):
422 channelTree.append(data[0][key])
426 sys.stdout.write(
'<?xml version="1.0" encoding="UTF-8"?>\n')
427 sys.stdout.write(etree.tostring(rssTree, encoding=
'UTF-8', pretty_print=
True))
432 '''Gather the Hulu feeds then get a max page of videos meta data in each of them
433 Display the results and exit
438 except Exception
as e:
439 sys.stderr.write(
'%s' % e)
442 if self.
config[
'debug_enabled']:
443 print(
"self.userPrefs:")
444 sys.stdout.write(etree.tostring(self.
userPrefs, encoding=
'UTF-8', pretty_print=
True))
451 rssTree = etree.XML(self.
common.mnvRSS+
'</rss>')
455 rssTree.append(channelTree)
458 searchResultTree = []
459 searchFilter = etree.XPath(
"//item")
460 userSearchStrings =
'userSearchStrings'
462 userSearch = self.
userPrefs.
find(userSearchStrings).xpath(
'./userSearch')
464 for searchDetails
in userSearch:
466 data = self.
searchTitle(searchDetails.find(
'searchTerm').text, 1, self.page_limit)
467 except HuluVideoNotFound
as msg:
468 sys.stderr.write(
"%s\n" % msg)
470 except HuluUrlError
as msg:
471 sys.stderr.write(
'%s\n' % msg)
473 except HuluHttpError
as msg:
476 except HuluRssError
as msg:
479 except Exception
as e:
480 sys.stderr.write(
"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
482 dirElement = etree.XML(
'<directory></directory>')
483 dirElement.attrib[
'name'] = self.
common.massageText(searchDetails.find(
'dirName').text)
486 for key
in sorted(data[0].keys()):
488 dirElement.append(data[0][key])
490 channelTree.append(dirElement)
494 rssData = etree.XML(
'<xml></xml>')
495 for feedType
in [
'treeviewURLS', ]:
501 urlEnabled = rssFeed.attrib.get(
'enabled')
502 if urlEnabled ==
'false':
504 urlName = rssFeed.attrib.get(
'name')
506 uniqueName =
'%s;%s' % (urlName, rssFeed.text)
508 uniqueName =
'RSS;%s' % (rssFeed.text)
509 url = etree.XML(
'<url></url>')
510 etree.SubElement(url,
"name").text = uniqueName
511 etree.SubElement(url,
"href").text = rssFeed.text
512 etree.SubElement(url,
"filter").text =
"//channel/title"
513 etree.SubElement(url,
"filter").text =
"//item"
514 etree.SubElement(url,
"parserType").text =
'xml'
517 if self.
config[
'debug_enabled']:
519 sys.stdout.write(etree.tostring(rssData, encoding=
'UTF-8', pretty_print=
True))
523 if rssData.find(
'url')
is not None:
525 resultTree = self.
common.getUrlData(rssData)
526 except Exception
as errormsg:
528 if self.
config[
'debug_enabled']:
530 sys.stdout.write(etree.tostring(resultTree, encoding=
'UTF-8', pretty_print=
True))
534 itemFilter = etree.XPath(
'.//item', namespaces=self.
common.namespaces)
535 titleFilter = etree.XPath(
'.//title', namespaces=self.
common.namespaces)
536 linkFilter = etree.XPath(
'.//link', namespaces=self.
common.namespaces)
537 descriptionFilter = etree.XPath(
'.//description', namespaces=self.
common.namespaces)
538 authorFilter = etree.XPath(
'.//media:credit', namespaces=self.
common.namespaces)
539 pubDateFilter = etree.XPath(
'.//pubDate', namespaces=self.
common.namespaces)
540 feedFilter = etree.XPath(
'//url[text()=$url]')
541 descFilter2 = etree.XPath(
'.//p')
542 itemThumbNail = etree.XPath(
'.//media:thumbnail', namespaces=self.
common.namespaces)
543 itemDwnLink = etree.XPath(
'.//media:content', namespaces=self.
common.namespaces)
544 itemLanguage = etree.XPath(
'.//media:content', namespaces=self.
common.namespaces)
545 itemDuration = etree.XPath(
'.//media:content', namespaces=self.
common.namespaces)
546 rssName = etree.XPath(
'title', namespaces=self.
common.namespaces)
548 categoryElement =
None
549 for result
in resultTree.findall(
'results'):
550 names = result.find(
'name').text.split(
';')
551 names[0] = self.
common.massageText(names[0])
552 if names[0] ==
'RSS':
553 names[0] = self.
common.massageText(rssName(result.find(
'result'))[0].text.replace(
'Hulu - ',
''))
556 url = feedFilter(self.
userPrefs, url=names[1])
558 if url[0].attrib.get(
'max'):
560 urlMax = int(url[0].attrib.get(
'max'))
563 elif url[0].getparent().attrib.get(
'globalmax'):
565 urlMax = int(url[0].getparent().attrib.get(
'globalmax'))
571 channelLanguage =
'en'
573 if names[0] != categoryDir:
574 if categoryDir
is not None:
575 channelTree.append(categoryElement)
576 categoryElement = etree.XML(
'<directory></directory>')
577 categoryElement.attrib[
'name'] = names[0]
579 categoryDir = names[0]
581 if self.
config[
'debug_enabled']:
582 print(
"Results: #Items(%s) for (%s)" % (len(itemFilter(result)), names))
586 for itemData
in itemFilter(result.find(
'result')):
587 huluItem = etree.XML(self.
common.mnvItem)
588 link = self.
common.ampReplace(linkFilter(itemData)[0].text)
590 pubdate = pubDateFilter(itemData)[0].text[:-5]+
'GMT'
593 huluItem.find(
'title').text = self.
common.massageText(titleFilter(itemData)[0].text.strip())
594 if authorFilter(itemData)[0].text:
595 huluItem.find(
'author').text = self.
common.massageText(authorFilter(itemData)[0].text.strip())
597 huluItem.find(
'author').text =
'Hulu'
598 huluItem.find(
'pubDate').text = pubdate
599 description = etree.HTML(etree.tostring(descriptionFilter(itemData)[0], method=
"text", encoding=str).strip())
600 if descFilter2(description)[0].text
is not None:
601 huluItem.find(
'description').text = self.
common.massageText(descFilter2(description)[0].text.strip())
603 huluItem.find(
'description').text =
''
604 for e
in descFilter2(description)[1]:
605 eText = etree.tostring(e, method=
"text", encoding=str)
608 if eText.startswith(
'Duration: '):
609 eText = eText.replace(
'Duration: ',
'').strip()
611 videoDuration = eText.split(
':')
613 if len(videoDuration) == 1:
614 videoSeconds = int(videoDuration[0])
615 elif len(videoDuration) == 2:
616 videoSeconds = int(videoDuration[0])*60+int(videoDuration[1])
617 elif len(videoDuration) == 3:
618 videoSeconds = int(videoDuration[0])*3600+int(videoDuration[1])*60+int(videoDuration[2])
620 itemDwnLink(huluItem)[0].attrib[
'duration'] = str(videoSeconds)
623 elif eText.startswith(
'Rating: '):
624 eText = eText.replace(
'Rating: ',
'').strip()
625 videoRating = eText.split(
' ')
626 huluItem.find(
'rating').text = videoRating[0]
628 huluItem.find(
'link').text = link
629 itemDwnLink(huluItem)[0].attrib[
'url'] = link
631 itemThumbNail(huluItem)[0].attrib[
'url'] = self.
common.ampReplace(itemThumbNail(itemData)[0].attrib[
'url'])
634 itemLanguage(huluItem)[0].attrib[
'lang'] = channelLanguage
635 etree.SubElement(huluItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text =
'us'
636 s_e = self.
getSeasonEpisode(huluItem.find(
'title').text, huluItem.find(
'description').text, itemThumbNail(huluItem)[0].attrib[
'url'])
638 etree.SubElement(huluItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = s_e[0]
640 etree.SubElement(huluItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = s_e[1]
641 if not huluItem.find(
'title').text
and s_e[2]:
642 huluItem.find(
'title').text = s_e[2]
643 categoryElement.append(huluItem)
650 if categoryElement
is not None:
651 if categoryElement.xpath(
'.//item')
is not None:
652 channelTree.append(categoryElement)
655 if len(rssTree.xpath(
'//item')):
657 sys.stdout.write(
'<?xml version="1.0" encoding="UTF-8"?>\n')
658 sys.stdout.write(etree.tostring(rssTree, encoding=
'UTF-8', pretty_print=
True))