14 __title__ =
"hulu_api - Simple-to-use Python interface to the Hulu RSS feeds (http://www.hulu.com/)"
15 __author__=
"R.D. Vaughan"
17 This python script is intended to perform a variety of utility functions to search and access text
18 meta data, video and image URLs from the Hulu Web site. These routines process RSS feeds
19 provided by Hulu (http://www.hulu.com/). The specific Hulu RSS feeds that are processed are controled through a user XML preference file usually found at
20 "~/.mythtv/MythNetvision/userGrabberPrefs/hulu.xml"
29 import os, struct, sys, re, time, datetime, shutil, urllib.request, urllib.parse, urllib.error
31 from socket
import gethostname, gethostbyname
32 from threading
import Thread
33 from copy
import deepcopy
34 from operator
import itemgetter, attrgetter
36 from .hulu_exceptions
import (HuluUrlError, HuluHttpError, HuluRssError, HuluVideoNotFound, HuluConfigFileError, HuluUrlDownloadError)
40 """Wraps a stream with an encoder"""
49 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
50 if isinstance(obj, str):
52 self.
out.buffer.write(obj)
55 """Delegate everything but write to the stream"""
56 return getattr(self.
out, attr)
58 if isinstance(sys.stdout, io.TextIOWrapper):
64 from io
import StringIO
65 from lxml
import etree
66 except Exception
as e:
67 sys.stderr.write(
'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
75 for digit
in etree.LIBXML_VERSION:
76 version+=str(digit)+
'.'
77 version = version[:-1]
80 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
81 At least "libxml" version 2.7.2 must be installed. Your version is (%s).
87 """Main interface to http://www.hulu.com/
88 This is done to support a common naming framework for all python Netvision plugins no matter their
91 Supports search methods
92 The apikey is a not required to access http://www.hulu.com/
102 search_all_languages = False,
104 """apikey (str/unicode):
105 Specify the target site API key. Applications need their own key in some cases
108 When True, the returned meta data is being returned has the key and values massaged to match MythTV
109 When False, the returned meta data is being returned matches what target site returned
111 interactive (True/False): (This option is not supported by all target site apis)
112 When True, uses built-in console UI is used to select the correct show.
113 When False, the first search result is used.
115 select_first (True/False): (This option is not supported currently implemented in any grabbers)
116 Automatically selects the first series search result (rather
117 than showing the user a list of more than one series).
118 Is overridden by interactive = False, or specifying a custom_ui
121 shows verbose debugging information
123 custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers)
124 A callable subclass of interactive class (overrides interactive option)
126 language (2 character language abbreviation): (This option is not supported by all target site apis)
127 The language of the returned data. Is also the language search
128 uses. Default is "en" (English). For full list, run..
130 search_all_languages (True/False): (This option is not supported by all target site apis)
131 By default, a Netvision grabber will only search in the language specified using
132 the language option. When this is True, it will search for the
138 if apikey
is not None:
139 self.
config[
'apikey'] = apikey
143 self.
config[
'debug_enabled'] = debug
151 self.
config[
'custom_ui'] = custom_ui
155 self.
config[
'select_first'] = select_first
157 self.
config[
'search_all_languages'] = search_all_languages
159 self.
error_messages = {
'HuluUrlError':
"! Error: The URL (%s) cause the exception error (%s)\n",
'HuluHttpError':
"! Error: An HTTP communications error with the Hulu was raised (%s)\n",
'HuluRssError':
"! Error: Invalid RSS meta data\nwas received from the Hulu error (%s). Skipping item.\n",
'HuluVideoNotFound':
"! Error: Video search with the Hulu did not return any results (%s)\n",
'HuluConfigFileError':
"! Error: hulu_config.xml file missing\nit should be located in and named as (%s).\n",
'HuluUrlDownloadError':
"! Error: Downloading a RSS feed or Web page (%s).\n", }
162 self.
channel = {
'channel_title':
'Hulu',
'channel_link':
'http://www.hulu.com/',
'channel_description':
"Hulu.com is a free online video service that offers hit TV shows including Family Guy, 30 Rock, and the Daily Show with Jon Stewart, etc.",
'channel_numresults': 0,
'channel_returned': 1,
'channel_startindex': 0}
167 re.compile(
'''^.+?[Ss](?P<seasno>[0-9]+).*.+?[Ee](?P<epno>[0-9]+).*$''', re.UNICODE),
169 re.compile(
'''^.+?season\\ (?P<seasno>[0-9]+).*.+?episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
171 re.compile(
'''(?P<seriesname>[^_]+)\\_(?P<seasno>[0-9]+)\\_(?P<epno>[0-9]+).*$''', re.UNICODE),
174 self.
channel_icon =
'%SHAREDIR%/mythnetvision/icons/hulu.png'
176 self.
config[
'image_extentions'] = [
"png",
"jpg",
"bmp"]
186 ''' Read the MNV Hulu grabber "hulu_config.xml" configuration file
190 url =
'file://%s/nv_python_libs/configs/XML/hulu_config.xml' % (baseProcessingDir, )
191 if not os.path.isfile(url[7:]):
194 if self.
config[
'debug_enabled']:
199 except Exception
as e:
206 '''Read the hulu_config.xml and user preference hulu.xml file.
207 If the hulu.xml file does not exist then copy the default.
215 if userPreferenceFile[0] ==
'~':
216 self.
hulu_config.
find(
'userPreferenceFile').text =
"%s%s" % (os.path.expanduser(
"~"), userPreferenceFile[1:])
219 if not os.path.isfile(self.
hulu_config.
find(
'userPreferenceFile').text):
221 prefDir = self.
hulu_config.
find(
'userPreferenceFile').text.replace(
'/hulu.xml',
'')
222 if not os.path.isdir(prefDir):
224 defaultConfig =
'%s/nv_python_libs/configs/XML/defaultUserPrefs/hulu.xml' % (baseProcessingDir, )
225 shutil.copy2(defaultConfig, self.
hulu_config.
find(
'userPreferenceFile').text)
228 url =
'file://%s' % (self.
hulu_config.
find(
'userPreferenceFile').text, )
229 if self.
config[
'debug_enabled']:
234 except Exception
as e:
240 ''' Check is there is any season or episode number information in an item's title
241 return array of season and/or episode numbers, Series name (only if title empty)
242 return array with None values
244 s_e = [
None,
None,
None]
248 s_e[0], s_e[1] = match.groups()
249 if not s_e[0]
and desc:
252 s_e[0], s_e[1] = match.groups()
253 if thumbnail
and not title:
254 filepath, filename = os.path.split( thumbnail.replace(
'http:/',
'') )
257 s_e[2], s_e[0], s_e[1] = match.groups()
258 s_e[0] =
'%s' % int(s_e[0])
259 s_e[1] =
'%s' % int(s_e[1])
260 s_e[2] =
"".join([w.capitalize()
for w
in re.split(re.compile(
r"[\W_]*"), s_e[2].replace(
'_',
' ').replace(
'-',
' '))])
272 '''Key word video search of the Hulu web site
273 return an array of matching item elements
279 url = self.
hulu_config.
find(
'searchURLS').xpath(
".//href")[0].text.replace(
'PAGENUM', str(pagenumber)).replace(
'SEARCHTERM', urllib.parse.quote_plus(title.encode(
"utf-8")))
281 if self.
config[
'debug_enabled']:
290 except Exception
as errormsg:
298 if resultTree
is None:
299 raise HuluVideoNotFound(
"No Hulu Video matches found for search value (%s)" % title)
301 searchResults = resultTree.xpath(
'//result//a[@href!="#"]')
302 if not len(searchResults):
303 raise HuluVideoNotFound(
"No Hulu Video matches found for search value (%s)" % title)
305 if self.
config[
'debug_enabled']:
306 print(
"resultTree: count(%s)" % len(searchResults))
311 pubDate = datetime.datetime.now().strftime(self.
common.pubDateFormat)
314 titleFilter = etree.XPath(
".//img")
315 thumbnailFilter = etree.XPath(
".//img")
316 itemLink = etree.XPath(
'.//media:content', namespaces=self.
common.namespaces)
317 itemThumbnail = etree.XPath(
'.//media:thumbnail', namespaces=self.
common.namespaces)
319 for result
in searchResults:
320 tmpLink = result.attrib[
'href']
323 huluItem = etree.XML(self.
common.mnvItem)
325 link = self.
common.ampReplace(tmpLink)
326 tmpTitleText = titleFilter(result)[0].attrib[
'alt'].strip()
327 tmpList = tmpTitleText.split(
':')
328 title = self.
common.massageText(tmpList[0].strip())
330 description = self.
common.massageText(tmpList[1].strip())
335 huluItem.find(
'title').text = title
336 huluItem.find(
'author').text =
'Hulu'
337 huluItem.find(
'pubDate').text = pubDate
338 huluItem.find(
'description').text = description
339 huluItem.find(
'link').text = link
340 itemThumbnail(huluItem)[0].attrib[
'url'] = self.
common.ampReplace(thumbnailFilter(result)[0].attrib[
'src'])
341 itemLink(huluItem)[0].attrib[
'url'] = link
342 etree.SubElement(huluItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text =
'us'
343 s_e = self.
getSeasonEpisode(title, description, itemThumbnail(huluItem)[0].attrib[
'url'])
345 etree.SubElement(huluItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = s_e[0]
347 etree.SubElement(huluItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = s_e[1]
348 if not title
and s_e[2]:
349 huluItem.find(
'title').text = s_e[2]
350 itemDict[link] = huluItem
352 if not len(list(itemDict.keys())):
353 raise HuluVideoNotFound(
"No Hulu Video matches found for search value (%s)" % title)
356 self.
channel[
'channel_numresults'] = len(itemDict)
359 lastPage = resultTree.xpath(
'//result//a[@alt="Go to the last page"]')
363 if pagenumber < lastPage[0].text:
368 return [itemDict, morePages]
373 """Common name for a video search. Used to interface with MythTV plugin NetVision
378 except Exception
as e:
379 sys.stderr.write(
'%s' % e)
382 if self.
config[
'debug_enabled']:
383 print(
"self.userPrefs:")
384 sys.stdout.write(etree.tostring(self.
userPrefs, encoding=
'UTF-8', pretty_print=
True))
394 data = self.
searchTitle(title, pagenumber, self.page_limit)
395 except HuluVideoNotFound
as msg:
396 sys.stderr.write(
"%s\n" % msg)
398 except HuluUrlError
as msg:
399 sys.stderr.write(
'%s\n' % msg)
401 except HuluHttpError
as msg:
404 except HuluRssError
as msg:
407 except Exception
as e:
408 sys.stderr.write(
"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
411 if self.
config[
'debug_enabled']:
412 print(
"Number of items returned by the search(%s)" % len(list(data[0].keys())))
413 sys.stdout.write(etree.tostring(self.
userPrefs, encoding=
'UTF-8', pretty_print=
True))
417 rssTree = etree.XML(self.
common.mnvRSS+
'</rss>')
420 itemCount = len(list(data[0].keys()))
422 self.
channel[
'channel_returned'] = itemCount
423 self.
channel[
'channel_startindex'] = self.page_limit*(int(pagenumber)-1)
424 self.
channel[
'channel_numresults'] = itemCount+(self.page_limit*(int(pagenumber)-1)+1)
426 self.
channel[
'channel_returned'] = itemCount
427 self.
channel[
'channel_startindex'] = itemCount+(self.page_limit*(int(pagenumber)-1))
428 self.
channel[
'channel_numresults'] = itemCount+(self.page_limit*(int(pagenumber)-1))
432 rssTree.append(channelTree)
435 for key
in sorted(data[0].keys()):
437 channelTree.append(data[0][key])
441 sys.stdout.write(
'<?xml version="1.0" encoding="UTF-8"?>\n')
442 sys.stdout.write(etree.tostring(rssTree, encoding=
'UTF-8', pretty_print=
True))
447 '''Gather the Hulu feeds then get a max page of videos meta data in each of them
448 Display the results and exit
453 except Exception
as e:
454 sys.stderr.write(
'%s' % e)
457 if self.
config[
'debug_enabled']:
458 print(
"self.userPrefs:")
459 sys.stdout.write(etree.tostring(self.
userPrefs, encoding=
'UTF-8', pretty_print=
True))
466 rssTree = etree.XML(self.
common.mnvRSS+
'</rss>')
470 rssTree.append(channelTree)
473 searchResultTree = []
474 searchFilter = etree.XPath(
"//item")
475 userSearchStrings =
'userSearchStrings'
477 userSearch = self.
userPrefs.
find(userSearchStrings).xpath(
'./userSearch')
479 for searchDetails
in userSearch:
481 data = self.
searchTitle(searchDetails.find(
'searchTerm').text, 1, self.page_limit)
482 except HuluVideoNotFound
as msg:
483 sys.stderr.write(
"%s\n" % msg)
485 except HuluUrlError
as msg:
486 sys.stderr.write(
'%s\n' % msg)
488 except HuluHttpError
as msg:
491 except HuluRssError
as msg:
494 except Exception
as e:
495 sys.stderr.write(
"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
497 dirElement = etree.XML(
'<directory></directory>')
498 dirElement.attrib[
'name'] = self.
common.massageText(searchDetails.find(
'dirName').text)
501 for key
in sorted(data[0].keys()):
503 dirElement.append(data[0][key])
505 channelTree.append(dirElement)
509 rssData = etree.XML(
'<xml></xml>')
510 for feedType
in [
'treeviewURLS', ]:
516 urlEnabled = rssFeed.attrib.get(
'enabled')
517 if urlEnabled ==
'false':
519 urlName = rssFeed.attrib.get(
'name')
521 uniqueName =
'%s;%s' % (urlName, rssFeed.text)
523 uniqueName =
'RSS;%s' % (rssFeed.text)
524 url = etree.XML(
'<url></url>')
525 etree.SubElement(url,
"name").text = uniqueName
526 etree.SubElement(url,
"href").text = rssFeed.text
527 etree.SubElement(url,
"filter").text =
"//channel/title"
528 etree.SubElement(url,
"filter").text =
"//item"
529 etree.SubElement(url,
"parserType").text =
'xml'
532 if self.
config[
'debug_enabled']:
534 sys.stdout.write(etree.tostring(rssData, encoding=
'UTF-8', pretty_print=
True))
538 if rssData.find(
'url')
is not None:
540 resultTree = self.
common.getUrlData(rssData)
541 except Exception
as errormsg:
543 if self.
config[
'debug_enabled']:
545 sys.stdout.write(etree.tostring(resultTree, encoding=
'UTF-8', pretty_print=
True))
549 itemFilter = etree.XPath(
'.//item', namespaces=self.
common.namespaces)
550 titleFilter = etree.XPath(
'.//title', namespaces=self.
common.namespaces)
551 linkFilter = etree.XPath(
'.//link', namespaces=self.
common.namespaces)
552 descriptionFilter = etree.XPath(
'.//description', namespaces=self.
common.namespaces)
553 authorFilter = etree.XPath(
'.//media:credit', namespaces=self.
common.namespaces)
554 pubDateFilter = etree.XPath(
'.//pubDate', namespaces=self.
common.namespaces)
555 feedFilter = etree.XPath(
'//url[text()=$url]')
556 descFilter2 = etree.XPath(
'.//p')
557 itemThumbNail = etree.XPath(
'.//media:thumbnail', namespaces=self.
common.namespaces)
558 itemDwnLink = etree.XPath(
'.//media:content', namespaces=self.
common.namespaces)
559 itemLanguage = etree.XPath(
'.//media:content', namespaces=self.
common.namespaces)
560 itemDuration = etree.XPath(
'.//media:content', namespaces=self.
common.namespaces)
561 rssName = etree.XPath(
'title', namespaces=self.
common.namespaces)
563 categoryElement =
None
564 for result
in resultTree.findall(
'results'):
565 names = result.find(
'name').text.split(
';')
566 names[0] = self.
common.massageText(names[0])
567 if names[0] ==
'RSS':
568 names[0] = self.
common.massageText(rssName(result.find(
'result'))[0].text.replace(
'Hulu - ',
''))
571 url = feedFilter(self.
userPrefs, url=names[1])
573 if url[0].attrib.get(
'max'):
575 urlMax = int(url[0].attrib.get(
'max'))
578 elif url[0].getparent().attrib.get(
'globalmax'):
580 urlMax = int(url[0].getparent().attrib.get(
'globalmax'))
586 channelLanguage =
'en'
588 if names[0] != categoryDir:
589 if categoryDir
is not None:
590 channelTree.append(categoryElement)
591 categoryElement = etree.XML(
'<directory></directory>')
592 categoryElement.attrib[
'name'] = names[0]
594 categoryDir = names[0]
596 if self.
config[
'debug_enabled']:
597 print(
"Results: #Items(%s) for (%s)" % (len(itemFilter(result)), names))
601 for itemData
in itemFilter(result.find(
'result')):
602 huluItem = etree.XML(self.
common.mnvItem)
603 link = self.
common.ampReplace(linkFilter(itemData)[0].text)
605 pubdate = pubDateFilter(itemData)[0].text[:-5]+
'GMT'
608 huluItem.find(
'title').text = self.
common.massageText(titleFilter(itemData)[0].text.strip())
609 if authorFilter(itemData)[0].text:
610 huluItem.find(
'author').text = self.
common.massageText(authorFilter(itemData)[0].text.strip())
612 huluItem.find(
'author').text =
'Hulu'
613 huluItem.find(
'pubDate').text = pubdate
614 description = etree.HTML(etree.tostring(descriptionFilter(itemData)[0], method=
"text", encoding=str).strip())
615 if descFilter2(description)[0].text
is not None:
616 huluItem.find(
'description').text = self.
common.massageText(descFilter2(description)[0].text.strip())
618 huluItem.find(
'description').text =
''
619 for e
in descFilter2(description)[1]:
620 eText = etree.tostring(e, method=
"text", encoding=str)
623 if eText.startswith(
'Duration: '):
624 eText = eText.replace(
'Duration: ',
'').strip()
626 videoDuration = eText.split(
':')
628 if len(videoDuration) == 1:
629 videoSeconds = int(videoDuration[0])
630 elif len(videoDuration) == 2:
631 videoSeconds = int(videoDuration[0])*60+int(videoDuration[1])
632 elif len(videoDuration) == 3:
633 videoSeconds = int(videoDuration[0])*3600+int(videoDuration[1])*60+int(videoDuration[2])
635 itemDwnLink(huluItem)[0].attrib[
'duration'] = str(videoSeconds)
638 elif eText.startswith(
'Rating: '):
639 eText = eText.replace(
'Rating: ',
'').strip()
640 videoRating = eText.split(
' ')
641 huluItem.find(
'rating').text = videoRating[0]
643 huluItem.find(
'link').text = link
644 itemDwnLink(huluItem)[0].attrib[
'url'] = link
646 itemThumbNail(huluItem)[0].attrib[
'url'] = self.
common.ampReplace(itemThumbNail(itemData)[0].attrib[
'url'])
649 itemLanguage(huluItem)[0].attrib[
'lang'] = channelLanguage
650 etree.SubElement(huluItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text =
'us'
651 s_e = self.
getSeasonEpisode(huluItem.find(
'title').text, huluItem.find(
'description').text, itemThumbNail(huluItem)[0].attrib[
'url'])
653 etree.SubElement(huluItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = s_e[0]
655 etree.SubElement(huluItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = s_e[1]
656 if not huluItem.find(
'title').text
and s_e[2]:
657 huluItem.find(
'title').text = s_e[2]
658 categoryElement.append(huluItem)
665 if categoryElement
is not None:
666 if categoryElement.xpath(
'.//item')
is not None:
667 channelTree.append(categoryElement)
670 if len(rssTree.xpath(
'//item')):
672 sys.stdout.write(
'<?xml version="1.0" encoding="UTF-8"?>\n')
673 sys.stdout.write(etree.tostring(rssTree, encoding=
'UTF-8', pretty_print=
True))