14 __title__ =
"bbciplayer_api - Simple-to-use Python interface to the BBC iPlayer RSS feeds (http://www.bbc.co.uk)"
15 __author__=
"R.D. Vaughan"
17 This python script is intended to perform a variety of utility functions to search and access text
18 meta data, video and image URLs from the BBC iPlayer Web site. These routines process RSS feeds
19 provided by BBC (http://www.bbc.co.uk). The specific BBC iPlayer RSS feeds that are processed are controled through a user XML preference file usually found at
20 "~/.mythtv/MythNetvision/userGrabberPrefs/bbciplayer.xml"
32 import os, struct, sys, re, time, datetime, shutil, urllib, re
34 from socket
import gethostname, gethostbyname
35 from threading
import Thread
36 from copy
import deepcopy
37 from operator
import itemgetter, attrgetter
38 from MythTV
import MythXML
39 from bbciplayer_exceptions
import (BBCUrlError, BBCHttpError, BBCRssError, BBCVideoNotFound, BBCConfigFileError, BBCUrlDownloadError)
42 """Wraps a stream with an encoder"""
51 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
52 if isinstance(obj, unicode):
64 """Delegate everything but write to the stream"""
65 return getattr(self.
out, attr)
71 from StringIO
import StringIO
72 from lxml
import etree
74 sys.stderr.write(
u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
82 for digit
in etree.LIBXML_VERSION:
83 version+=str(digit)+
'.'
84 version = version[:-1]
87 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
88 At least "libxml" version 2.7.2 must be installed. Your version is (%s).
94 """Main interface to http://www.bbciplayer.com/
95 This is done to support a common naming framework for all python Netvision plugins no matter their site
98 Supports search methods
99 The apikey is a not required to access http://www.bbciplayer.com/
105 select_first = False,
109 search_all_languages = False,
111 """apikey (str/unicode):
112 Specify the target site API key. Applications need their own key in some cases
115 When True, the returned meta data is being returned has the key and values massaged to match MythTV
116 When False, the returned meta data is being returned matches what target site returned
118 interactive (True/False): (This option is not supported by all target site apis)
119 When True, uses built-in console UI is used to select the correct show.
120 When False, the first search result is used.
122 select_first (True/False): (This option is not supported currently implemented in any grabbers)
123 Automatically selects the first series search result (rather
124 than showing the user a list of more than one series).
125 Is overridden by interactive = False, or specifying a custom_ui
128 shows verbose debugging information
130 custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers)
131 A callable subclass of interactive class (overrides interactive option)
133 language (2 character language abbreviation): (This option is not supported by all target site apis)
134 The language of the returned data. Is also the language search
135 uses. Default is "en" (English). For full list, run..
137 search_all_languages (True/False): (This option is not supported by all target site apis)
138 By default, a Netvision grabber will only search in the language specified using
139 the language option. When this is True, it will search for the
146 if apikey
is not None:
147 self.
config[
'apikey'] = apikey
151 self.
config[
'debug_enabled'] = debug
155 self.
log_name =
u'BBCiPlayer_Grabber'
159 self.
config[
'custom_ui'] = custom_ui
163 self.
config[
'select_first'] = select_first
165 self.
config[
'search_all_languages'] = search_all_languages
167 self.
error_messages = {
'BBCUrlError':
u"! Error: The URL (%s) cause the exception error (%s)\n",
'BBCHttpError':
u"! Error: An HTTP communications error with the BBC was raised (%s)\n",
'BBCRssError':
u"! Error: Invalid RSS meta data\nwas received from the BBC error (%s). Skipping item.\n",
'BBCVideoNotFound':
u"! Error: Video search with the BBC did not return any results (%s)\n",
'BBCConfigFileError':
u"! Error: bbc_config.xml file missing\nit should be located in and named as (%s).\n",
'BBCUrlDownloadError':
u"! Error: Downloading a RSS feed or Web page (%s).\n", }
170 self.
channel = {
'channel_title':
u'BBC iPlayer',
'channel_link':
u'http://www.bbc.co.uk',
'channel_description':
u"BBC iPlayer is our service that lets you catch up with radio and television programmes from the past week.",
'channel_numresults': 0,
'channel_returned': 1,
u'channel_startindex': 0}
174 etree.XPath(
'.//a[@class="episode-title title-link cta-video"]', namespaces=self.
common.namespaces),
175 etree.XPath(
'.//div[@class="feature video"]', namespaces=self.
common.namespaces),
176 etree.XPath(
'.//atm:category[@term="TV"]', namespaces=self.
common.namespaces),
182 re.compile(
u'''^.+?Series\\ (?P<seasno>[0-9]+).*.+?Episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
184 re.compile(
u'''^.+?Series\\ (?P<seasno>[0-9]+)\\ \\-\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
186 re.compile(
u'''^.+?Series\\ (?P<seasno>[0-9]+).*.+?Part\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
188 re.compile(
u'''^.+?Series\\ (?P<seasno>[0-9]+)\\:\\ Programme\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
190 re.compile(
u'''^.+?Series\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
192 re.compile(
u'''^.+?Episode\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
195 self.
channel_icon =
u'%SHAREDIR%/mythnetvision/icons/bbciplayer.jpg'
197 self.
config[
u'image_extentions'] = [
"png",
"jpg",
"bmp"]
207 ''' Read the MNV BBC iPlayer grabber "bbc_config.xml" configuration file
211 url =
u'file://%s/nv_python_libs/configs/XML/bbc_config.xml' % (baseProcessingDir, )
212 if not os.path.isfile(url[7:]):
215 if self.
config[
'debug_enabled']:
227 '''Read the bbciplayer_config.xml and user preference bbciplayer.xml file.
228 If the bbciplayer.xml file does not exist then copy the default.
236 if userPreferenceFile[0] ==
'~':
237 self.
bbciplayer_config.
find(
'userPreferenceFile').text =
u"%s%s" % (os.path.expanduser(
u"~"), userPreferenceFile[1:])
243 if not os.path.isdir(prefDir):
245 defaultConfig =
u'%s/nv_python_libs/configs/XML/defaultUserPrefs/bbciplayer.xml' % (baseProcessingDir, )
250 if self.
config[
'debug_enabled']:
261 '''Parse the item information (HTML or RSS/XML) to identify if the content is a video or
262 audio file. Set the contry code if a video is detected as it can only be played in the "UK"
263 return "uk" if a video type was detected.
264 return None if a video type was NOT detected.
268 if len(xpathP(item)):
276 ''' Check is there is any season or episode number information in an item's title
277 return array of season and/or episode numbers
278 return array with None values
286 s_e[0], s_e[1] = match.groups()
289 s_e[0] = match.groups()[0]
292 s_e[1] = match.groups()[0]
304 playerUrl = self.
mythxml.getInternetContentUrl(
"nv_python_libs/configs/HTML/bbciplayer.html", \
309 '''Key word video search of the BBC iPlayer web site
310 return an array of matching item elements
317 searchVar =
u'/?q=%s&page=%s' % (urllib.quote(title.encode(
"utf-8")), pagenumber)
318 except UnicodeDecodeError:
319 searchVar =
u'/?q=%s&page=%s' % (urllib.quote(title), pagenumber)
323 if self.
config[
'debug_enabled']:
332 except Exception, errormsg:
340 if resultTree
is None:
341 raise BBCVideoNotFound(
u"No BBC Video matches found for search value (%s)" % title)
343 searchResults = resultTree.xpath(
'//result//li')
344 if not len(searchResults):
345 raise BBCVideoNotFound(
u"No BBC Video matches found for search value (%s)" % title)
349 pubDate = datetime.datetime.now().strftime(self.
common.pubDateFormat)
355 urlType =
u'fullscreen'
358 audioFilter = etree.XPath(
'contains(./@class,"audio") or contains(./../../@class,"audio")')
359 linkFilter = etree.XPath(
u".//div[@class='episode-info ']//a")
360 titleFilter = etree.XPath(
u".//div[@class='episode-info ']//a")
361 descFilter = etree.XPath(
u".//div[@class='episode-info ']//p[@class='episode-synopsis']")
362 thumbnailFilter = etree.XPath(
u".//span[@class='episode-image cta-play']//img")
364 for result
in searchResults:
365 tmpLink = linkFilter(result)
368 bbciplayerItem = etree.XML(self.
common.mnvItem)
370 audioTF = audioFilter(result)
372 link = tmpLink[0].attrib[
'href']
373 if urlType ==
'bigscreen':
374 link =
u'http://www.bbc.co.uk/iplayer/bigscreen%s' % link.replace(
u'/iplayer',
u'')
375 elif urlType ==
'bbcweb':
376 link =
u'http://www.bbc.co.uk'+ link
379 link = link.replace(
u'/iplayer/episode/',
u'')
380 index = link.find(
u'/')
383 etree.SubElement(bbciplayerItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text =
'true'
385 link =
u'http://www.bbc.co.uk'+ link
386 link = self.
common.ampReplace(link)
388 title = self.
common.massageText(titleFilter(result)[0].attrib[
'title'].strip())
389 description = self.
common.massageText(etree.tostring(descFilter(result)[0], method=
"text", encoding=unicode).strip())
392 bbciplayerItem.find(
'title').text = title
393 bbciplayerItem.find(
'author').text =
u'BBC'
394 bbciplayerItem.find(
'pubDate').text = pubDate
395 bbciplayerItem.find(
'description').text = description
396 bbciplayerItem.find(
'link').text = link
397 bbciplayerItem.xpath(
'.//media:thumbnail', namespaces=self.
common.namespaces)[0].attrib[
'url'] = self.
common.ampReplace(thumbnailFilter(result)[0].attrib[
'src'])
398 bbciplayerItem.xpath(
'.//media:content', namespaces=self.
common.namespaces)[0].attrib[
'url'] = link
405 etree.SubElement(bbciplayerItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = countCode
408 etree.SubElement(bbciplayerItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = s_e[0]
410 etree.SubElement(bbciplayerItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = s_e[1]
411 itemDict[title.lower()] = bbciplayerItem
413 if not len(itemDict.keys()):
414 raise BBCVideoNotFound(
u"No BBC Video matches found for search value (%s)" % title)
417 self.
channel[
'channel_numresults'] = len(itemDict)
419 return [itemDict, resultTree.xpath(
'//pageInfo')[0].text]
424 """Common name for a video search. Used to interface with MythTV plugin NetVision
430 sys.stderr.write(
u'%s' % e)
433 if self.
config[
'debug_enabled']:
434 print "self.userPrefs:"
435 sys.stdout.write(etree.tostring(self.
userPrefs, encoding=
'UTF-8', pretty_print=
True))
445 data = self.
searchTitle(title, pagenumber, self.page_limit)
446 except BBCVideoNotFound, msg:
447 sys.stderr.write(
u"%s\n" % msg)
449 except BBCUrlError, msg:
450 sys.stderr.write(
u'%s\n' % msg)
452 except BBCHttpError, msg:
455 except BBCRssError, msg:
459 sys.stderr.write(
u"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
463 rssTree = etree.XML(self.
common.mnvRSS+
u'</rss>')
466 itemCount = len(data[0].keys())
467 if data[1] ==
'true':
468 self.
channel[
'channel_returned'] = itemCount
469 self.
channel[
'channel_startindex'] = itemCount
470 self.
channel[
'channel_numresults'] = itemCount+(self.page_limit*(int(pagenumber)-1)+1)
472 self.
channel[
'channel_returned'] = itemCount+(self.page_limit*(int(pagenumber)-1))
473 self.
channel[
'channel_startindex'] = self.
channel[
'channel_returned']
474 self.
channel[
'channel_numresults'] = self.
channel[
'channel_returned']
478 rssTree.append(channelTree)
481 for key
in sorted(data[0].keys()):
483 channelTree.append(data[0][key])
487 sys.stdout.write(
u'<?xml version="1.0" encoding="UTF-8"?>\n')
488 sys.stdout.write(etree.tostring(rssTree, encoding=
'UTF-8', pretty_print=
True))
493 '''Gather the BBC iPlayer feeds then get a max page of videos meta data in each of them
494 Display the results and exit
500 sys.stderr.write(
u'%s' % e)
503 if self.
config[
'debug_enabled']:
504 print "self.userPrefs:"
505 sys.stdout.write(etree.tostring(self.
userPrefs, encoding=
'UTF-8', pretty_print=
True))
512 rssTree = etree.XML(self.
common.mnvRSS+
u'</rss>')
516 rssTree.append(channelTree)
519 searchResultTree = []
520 searchFilter = etree.XPath(
u"//item")
521 userSearchStrings =
u'userSearchStrings'
523 userSearch = self.
userPrefs.
find(userSearchStrings).xpath(
'./userSearch')
525 for searchDetails
in userSearch:
527 data = self.
searchTitle(searchDetails.find(
'searchTerm').text, 1, self.page_limit)
528 except BBCVideoNotFound, msg:
529 sys.stderr.write(
u"%s\n" % msg)
531 except BBCUrlError, msg:
532 sys.stderr.write(
u'%s\n' % msg)
534 except BBCHttpError, msg:
537 except BBCRssError, msg:
541 sys.stderr.write(
u"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
543 dirElement = etree.XML(
u'<directory></directory>')
544 dirElement.attrib[
'name'] = self.
common.massageText(searchDetails.find(
'dirName').text)
547 for key
in sorted(data[0].keys()):
549 dirElement.append(data[0][key])
551 channelTree.append(dirElement)
555 rssData = etree.XML(
u'<xml></xml>')
556 for feedType
in [
u'treeviewURLS',
u'userFeeds']:
562 urlEnabled = rssFeed.attrib.get(
'enabled')
563 if urlEnabled ==
'false':
565 urlName = rssFeed.attrib.get(
'name')
567 uniqueName =
u'%s;%s' % (urlName, rssFeed.text)
569 uniqueName =
u'RSS;%s' % (rssFeed.text)
570 url = etree.XML(
u'<url></url>')
571 etree.SubElement(url,
"name").text = uniqueName
572 etree.SubElement(url,
"href").text = rssFeed.text
573 etree.SubElement(url,
"filter").text =
u"atm:title"
574 etree.SubElement(url,
"filter").text =
u"//atm:entry"
575 etree.SubElement(url,
"parserType").text =
u'xml'
578 if self.
config[
'debug_enabled']:
580 sys.stdout.write(etree.tostring(rssData, encoding=
'UTF-8', pretty_print=
True))
584 if rssData.find(
'url')
is not None:
586 resultTree = self.
common.getUrlData(rssData)
587 except Exception, errormsg:
589 if self.
config[
'debug_enabled']:
591 sys.stdout.write(etree.tostring(resultTree, encoding=
'UTF-8', pretty_print=
True))
598 urlType =
u'fullscreen'
601 feedFilter = etree.XPath(
'//url[text()=$url]')
602 itemFilter = etree.XPath(
'.//atm:entry', namespaces=self.
common.namespaces)
603 titleFilter = etree.XPath(
'.//atm:title', namespaces=self.
common.namespaces)
604 mediaFilter = etree.XPath(
'.//atm:category[@term="TV"]', namespaces=self.
common.namespaces)
605 linkFilter = etree.XPath(
'.//atm:link', namespaces=self.
common.namespaces)
606 descFilter1 = etree.XPath(
'.//atm:content', namespaces=self.
common.namespaces)
607 descFilter2 = etree.XPath(
'.//p')
608 itemThumbNail = etree.XPath(
'.//media:thumbnail', namespaces=self.
common.namespaces)
609 creationDate = etree.XPath(
'.//atm:updated', namespaces=self.
common.namespaces)
610 itemDwnLink = etree.XPath(
'.//media:content', namespaces=self.
common.namespaces)
611 itemLanguage = etree.XPath(
'.//media:content', namespaces=self.
common.namespaces)
612 rssName = etree.XPath(
'atm:title', namespaces=self.
common.namespaces)
614 categoryElement =
None
616 for result
in resultTree.findall(
'results'):
617 names = result.find(
'name').text.split(
u';')
618 names[0] = self.
common.massageText(names[0])
619 if names[0] ==
'RSS':
620 names[0] = self.
common.massageText(rssName(result.find(
'result'))[0].text.replace(
u'BBC iPlayer - ',
u''))
623 url = feedFilter(self.
userPrefs, url=names[1])
625 if url[0].attrib.get(
'max'):
627 urlMax = int(url[0].attrib.get(
'max'))
630 elif url[0].getparent().attrib.get(
'globalmax'):
632 urlMax = int(url[0].getparent().attrib.get(
'globalmax'))
638 channelLanguage =
u'en'
640 if names[0] != categoryDir:
641 if categoryDir
is not None:
642 channelTree.append(categoryElement)
643 categoryElement = etree.XML(
u'<directory></directory>')
644 categoryElement.attrib[
'name'] = names[0]
646 categoryDir = names[0]
648 if self.
config[
'debug_enabled']:
649 print "Results: #Items(%s) for (%s)" % (len(itemFilter(result)), names)
654 itemDict = [(pd.text, pd.getparent())
for pd
in creationDate(result)]
655 itemList = sorted(itemDict, key=itemgetter(0), reverse=
True)
657 for tupleDate
in itemList:
658 itemData = tupleDate[1]
659 bbciplayerItem = etree.XML(self.
common.mnvItem)
660 tmpLink = linkFilter(itemData)
662 link = tmpLink[0].attrib[
'href']
663 if urlType ==
'bigscreen':
664 link = link.replace(
u'/iplayer/',
u'/iplayer/bigscreen/')
665 elif urlType ==
'bbcweb':
668 if len(mediaFilter(itemData)):
669 link = link.replace(
u'http://www.bbc.co.uk/iplayer/episode/',
u'')
670 index = link.find(
u'/')
673 etree.SubElement(bbciplayerItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text =
'true'
679 pubdate = creationDate(itemData)
681 pubdate = pubdate[0].text
682 pubdate = time.strptime(pubdate,
'%Y-%m-%dT%H:%M:%SZ')
683 pubdate = time.strftime(self.
common.pubDateFormat, pubdate)
685 pubdate = datetime.datetime.now().strftime(self.
common.pubDateFormat)
688 bbciplayerItem.find(
'title').text = self.
common.massageText(titleFilter(itemData)[0].text.strip())
689 bbciplayerItem.find(
'author').text = itemAuthor
690 bbciplayerItem.find(
'pubDate').text = pubdate
691 description = etree.HTML(etree.tostring(descFilter1(itemData)[0], method=
"text", encoding=unicode).strip())
692 description = etree.tostring(descFilter2(description)[1], method=
"text", encoding=unicode).strip()
693 bbciplayerItem.find(
'description').text = self.
common.massageText(description)
694 bbciplayerItem.find(
'link').text = link
695 itemDwnLink(bbciplayerItem)[0].attrib[
'url'] = link
697 itemThumbNail(bbciplayerItem)[0].attrib[
'url'] = self.
common.ampReplace(itemThumbNail(itemData)[0].attrib[
'url'])
700 itemLanguage(bbciplayerItem)[0].attrib[
'lang'] = channelLanguage
704 etree.SubElement(bbciplayerItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = countCode
707 etree.SubElement(bbciplayerItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = s_e[0]
709 etree.SubElement(bbciplayerItem,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = s_e[1]
710 categoryElement.append(bbciplayerItem)
717 if categoryElement
is not None:
718 if categoryElement.xpath(
'.//item')
is not None:
719 channelTree.append(categoryElement)
722 if len(rssTree.xpath(
'//item')):
724 sys.stdout.write(
u'<?xml version="1.0" encoding="UTF-8"?>\n')
725 sys.stdout.write(etree.tostring(rssTree, encoding=
'UTF-8', pretty_print=
True))