15__title__ =
"tedtalksXSL_api - XPath and XSLT functions for the TedTalks RSS/HTML"
16__author__=
"R.D. Vaughan"
18This python script is intended to perform a variety of utility functions
19for the conversion of data to the MNV standard RSS output format.
20See this link for the specifications:
21http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
30__xpathClassList__ = [
'xpathFunctions', ]
34__xsltExtentionList__ = []
36import os, sys, re, time, datetime, shutil,
urllib.request, urllib.parse, urllib.error, string
37from copy
import deepcopy
42 """Wraps a stream with an encoder"""
51 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
52 if isinstance(obj, str):
55 self.
out.buffer.write(obj)
60 """Delegate everything but write to the stream"""
61 return getattr(self.
out, attr)
63if isinstance(sys.stdout, io.TextIOWrapper):
67 from io
import StringIO
68 from lxml
import etree
70 sys.stderr.write(
'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
75 """Functions specific extending XPath
78 self.
functList = [
'tedtalksMakeItem',
'tedtalksGetItem',
'tedtalksMakeLink',
'tedtalksTitleRSS', ]
80 'media':
"http://search.yahoo.com/mrss/",
81 'xhtml':
"http://www.w3.org/1999/xhtml",
82 'mythtv':
"http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format",
86 self.
durationFilter = etree.XPath(
'//dl[@class="talkMedallion clearfix"]//em[@class="date"]/text()', namespaces=self.
namespaces)
88 self.
flvPlayerLink =
'http://static.hd-trailers.net/mediaplayer/player.swf?autostart=true&backcolor=000000&frontcolor=999999&lightcolor=000000&screencolor=000000&controlbar=over&file=%s'
99 '''Generate item elements from a Video HTML page on the TedTalks site.
100 Call example: 'mnvXpath:tedtalksMakeItem(concat('http://www.ted.com
', normalize-space(./@href), $paraMeter))/link'
101 return an number of item elements
108 tmpHandle = urllib.request.urlopen(webURL)
109 htmlString = str(tmpHandle.read(),
'utf-8')
112 sys.stderr.write(
'! Error: TedTalk web page read issue for URL(%s)\nerror(%s)\n' % (webURL, errmsg))
113 return etree.XML(
"<xml></xml>" )
115 htmlElementTree = etree.HTML(htmlString)
118 mediaNamespace =
"http://search.yahoo.com/mrss/"
119 media =
"{%s}" % mediaNamespace
120 NSMAP = {
'media' : mediaNamespace}
121 elementTmp = etree.Element(media +
"media", nsmap=NSMAP)
126 tmpPubDate = common.pubDate(
'dummy',
'1 '+tmpPubDate,
"%d %b %Y")
128 tmpPubDate = common.pubDate(
'dummy',
'')
132 tmpFlvLink = self.
flvPlayerLink %
'http://video.ted.com/%s' % self.
stripSubstring(htmlString,
'\ths:"',
'"').replace(
'high', parmDict[
'flv'])
137 tmpFileName = self.
stripSubstring(htmlString,
'\ths:"talks/dynamic/',
'-')
138 tmpDownloadLink =
'http://video.ted.com/talks/podcast/%s' % tmpFileName
139 if parmDict[
'download'] ==
'HD':
140 tmpDownloadLink+=
'_480.mp4'
142 tmpDownloadLink+=
'.mp4'
150 tmpDesc = tmpDesc[0].text
157 index = tmpDuration[0].
find(
' ')
159 tmpDuration = common.convertDuration(
'dummy', tmpDuration[0][:index])
166 etree.SubElement(elementTmp,
"pubDate").text = tmpPubDate
167 etree.SubElement(elementTmp,
"description").text = tmpDesc
168 etree.SubElement(elementTmp,
"link").text = tmpFlvLink
169 tmpgroup = etree.SubElement(elementTmp, media +
"group")
170 tmpTNail = etree.SubElement(tmpgroup, media +
"thumbnail")
171 tmpTNail.attrib[
'url'] = tmpThumbNail
172 tmpContent = etree.SubElement(tmpgroup, media +
"content")
173 tmpContent.attrib[
'url'] = tmpDownloadLink
174 tmpContent.attrib[
'duration'] = tmpDuration
175 tmpContent.attrib[
'lang'] =
'en'
182 '''Return item elements that were previously created in "tedtalksMakeItem" call
183 Call example: 'mnvXpath:tedtalksGetItem(concat('http://www.ted.com
', normalize-space(./@href))/*'
184 return an number of item elements
192 '''Return item elements that were previously created in "tedtalksMakeItem" call
193 Call example: 'mnvXpath:tedtalksMakeLink(enclosure/@url, $paraMeter)'
194 return a link
for playing the flv file
196 tmpDownloadLink = arg[0]
198 index = tmpDownloadLink.rfind('/')
199 videoFileName =
'http://video.ted.com/talks/dynamic%s' % tmpDownloadLink[index:].replace(
'_480',
'').replace(
'.mp4',
'')
200 videoFileName+=
'-%s.flv' % parmDict[
'flv']
205 '''Return item elements that were previously created in "tedtalksMakeItem" call
206 Call example: 'mnvXpath:tedtalksTitleRSS(string(title))'
207 return a massaged title string
210 index = title.rfind('-')
213 return title[:index].strip()
217 '''Return a substring terminated by specific character(s)
220 index = string.find(startText)
223 string = string[index+len(startText):]
224 index = string.find(terminatorChar)
227 return string[:index].strip()
231 '''Set the parameters for TedTalks
232 return a dictionary of parameters
235 args = parameters.split(terminatorChar)
238 paramDict[tmp[0]] = tmp[1]
def __getattr__(self, attr)
def __init__(self, outstream, encoding=None)
def tedtalksMakeLink(self, context, *arg)
def tedtalksMakeItem(self, context, *arg)
Start of XPath extension functions.
def tedtalksTitleRSS(self, context, *arg)
def parameterArgs(self, parameters, terminatorChar=';')
def stripSubstring(self, string, startText, terminatorChar)
def tedtalksGetItem(self, context, *arg)
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)