15 __title__ =
"tedtalksXSL_api - XPath and XSLT functions for the TedTalks RSS/HTML"
16 __author__=
"R.D. Vaughan"
18 This python script is intended to perform a variety of utility functions
19 for the conversion of data to the MNV standard RSS output format.
20 See this link for the specifications:
21 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
30 __xpathClassList__ = [
'xpathFunctions', ]
34 __xsltExtentionList__ = []
36 import os, sys, re, time, datetime, shutil, urllib.request, urllib.parse, urllib.error, string
37 from copy
import deepcopy
42 """Wraps a stream with an encoder"""
51 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
52 if isinstance(obj, str):
55 self.
out.buffer.write(obj)
60 """Delegate everything but write to the stream"""
61 return getattr(self.
out, attr)
63 if isinstance(sys.stdout, io.TextIOWrapper):
67 from io
import StringIO
68 from lxml
import etree
69 except Exception
as e:
70 sys.stderr.write(
'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
78 for digit
in etree.LIBXML_VERSION:
79 version+=str(digit)+
'.'
80 version = version[:-1]
83 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
84 At least "libxml" version 2.7.2 must be installed. Your version is (%s).
90 """Functions specific extending XPath
93 self.
functList = [
'tedtalksMakeItem',
'tedtalksGetItem',
'tedtalksMakeLink',
'tedtalksTitleRSS', ]
95 'media':
"http://search.yahoo.com/mrss/",
96 'xhtml':
"http://www.w3.org/1999/xhtml",
97 'mythtv':
"http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format",
103 self.
flvPlayerLink =
'http://static.hd-trailers.net/mediaplayer/player.swf?autostart=true&backcolor=000000&frontcolor=999999&lightcolor=000000&screencolor=000000&controlbar=over&file=%s'
114 '''Generate item elements from a Video HTML page on the TedTalks site.
115 Call example: 'mnvXpath:tedtalksMakeItem(concat('http://www.ted.com', normalize-space(./@href), $paraMeter))/link'
116 return an number of item elements
123 tmpHandle = urllib.request.urlopen(webURL)
124 htmlString = str(tmpHandle.read(),
'utf-8')
127 sys.stderr.write(
'! Error: TedTalk web page read issue for URL(%s)\nerror(%s)\n' % (webURL, errmsg))
128 return etree.XML(
"<xml></xml>" )
130 htmlElementTree = etree.HTML(htmlString)
133 mediaNamespace =
"http://search.yahoo.com/mrss/"
134 media =
"{%s}" % mediaNamespace
135 NSMAP = {
'media' : mediaNamespace}
136 elementTmp = etree.Element(media +
"media", nsmap=NSMAP)
141 tmpPubDate = common.pubDate(
'dummy',
'1 '+tmpPubDate,
"%d %b %Y")
143 tmpPubDate = common.pubDate(
'dummy',
'')
147 tmpFlvLink = self.
flvPlayerLink %
'http://video.ted.com/%s' % self.
stripSubstring(htmlString,
'\ths:"',
'"').replace(
'high', parmDict[
'flv'])
152 tmpFileName = self.
stripSubstring(htmlString,
'\ths:"talks/dynamic/',
'-')
153 tmpDownloadLink =
'http://video.ted.com/talks/podcast/%s' % tmpFileName
154 if parmDict[
'download'] ==
'HD':
155 tmpDownloadLink+=
'_480.mp4'
157 tmpDownloadLink+=
'.mp4'
165 tmpDesc = tmpDesc[0].text
172 index = tmpDuration[0].
find(
' ')
174 tmpDuration = common.convertDuration(
'dummy', tmpDuration[0][:index])
181 etree.SubElement(elementTmp,
"pubDate").text = tmpPubDate
182 etree.SubElement(elementTmp,
"description").text = tmpDesc
183 etree.SubElement(elementTmp,
"link").text = tmpFlvLink
184 tmpgroup = etree.SubElement(elementTmp, media +
"group")
185 tmpTNail = etree.SubElement(tmpgroup, media +
"thumbnail")
186 tmpTNail.attrib[
'url'] = tmpThumbNail
187 tmpContent = etree.SubElement(tmpgroup, media +
"content")
188 tmpContent.attrib[
'url'] = tmpDownloadLink
189 tmpContent.attrib[
'duration'] = tmpDuration
190 tmpContent.attrib[
'lang'] =
'en'
197 '''Return item elements that were previously created in "tedtalksMakeItem" call
198 Call example: 'mnvXpath:tedtalksGetItem(concat('http://www.ted.com', normalize-space(./@href))/*'
199 return an number of item elements
207 '''Return item elements that were previously created in "tedtalksMakeItem" call
208 Call example: 'mnvXpath:tedtalksMakeLink(enclosure/@url, $paraMeter)'
209 return a link for playing the flv file
211 tmpDownloadLink = arg[0]
213 index = tmpDownloadLink.rfind(
'/')
214 videoFileName =
'http://video.ted.com/talks/dynamic%s' % tmpDownloadLink[index:].replace(
'_480',
'').replace(
'.mp4',
'')
215 videoFileName+=
'-%s.flv' % parmDict[
'flv']
220 '''Return item elements that were previously created in "tedtalksMakeItem" call
221 Call example: 'mnvXpath:tedtalksTitleRSS(string(title))'
222 return a massaged title string
225 index = title.rfind(
'-')
228 return title[:index].strip()
232 '''Return a substring terminated by specific character(s)
235 index = string.find(startText)
238 string = string[index+len(startText):]
239 index = string.find(terminatorChar)
242 return string[:index].strip()
246 '''Set the parameters for TedTalks
247 return a dictionary of parameters
250 args = parameters.split(terminatorChar)
253 paramDict[tmp[0]] = tmp[1]