15__title__ =
"tributeca_api - XPath and XSLT functions for the Tribute.ca grabber"
16__author__=
"R.D. Vaughan"
18This python script is intended to perform a variety of utility functions
19for the conversion of data to the MNV standard RSS output format.
20See this link for the specifications:
21http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
30__xpathClassList__ = [
'xpathFunctions', ]
34__xsltExtentionList__ = []
36import os, sys, re, time, datetime, shutil,
urllib.request, urllib.parse, urllib.error, string
37from copy
import deepcopy
41 """Wraps a stream with an encoder"""
50 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
51 if isinstance(obj, str):
54 self.
out.buffer.write(obj)
59 """Delegate everything but write to the stream"""
60 return getattr(self.
out, attr)
62if isinstance(sys.stdout, io.TextIOWrapper):
67 from io
import StringIO
68 from lxml
import etree
70 sys.stderr.write(
'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
75 """Functions specific extending XPath
78 self.
functList = [
'tributecaLinkGeneration',
'tributecaThumbnailLink',
'tributecaTopTenTitle',
'tributecaIsCustomHTML',
'tributecaCheckIfDBItem',
'tributecaDebug',
'tributecaGetAnchors', ]
80 self.
anchorList = etree.XPath(
".//a", namespaces=common.namespaces)
91 '''Generate a link for the Tribute.ca site. Sigificant massaging of the title is required.
92 Call example: 'mnvXpath:tributecaLinkGeneration(position(), ..//a)'
95 downloadURL = 'http://www.tribute.ca/streamingflash/%s.flv'
96 position = int(args[0])-1
97 webURL =
'http://www.tribute.ca%s' % args[1][position].attrib[
'href'].strip()
101 if self.
persistence[
'tributecaLinkGeneration']
is not None:
102 returnValue = self.
persistence[
'tributecaLinkGeneration']
104 if returnValue != webURL:
105 return downloadURL % returnValue
109 currentTitle = self.
TextTail(args[1][position]).strip()
113 previousTitle = self.
TextTail(args[1][position-1]).strip()
116 titleArray = [currentTitle, previousTitle]
117 if titleArray[0].startswith(
'IMAX:'):
118 titleArray[0] = titleArray[0].replace(
'IMAX:',
'').strip()
122 for counter
in range(len(titleArray)):
123 index = titleArray[counter].
find(
": ")
125 titleArray[counter] = titleArray[counter][:index].strip()
126 index = titleArray[counter].
find(
" (")
128 titleArray[counter] = titleArray[counter][:index].strip()
129 if titleArray[0].startswith(titleArray[1])
and titleArray[1]:
130 index = titleArray[counter].
find(
"3D")
132 titleArray[counter] = titleArray[counter][:index].strip()
136 if titleArray[0].startswith(titleArray[1])
and titleArray[1]:
138 if currentTitle.find(
': An IMAX') != -1:
140 titleArray[0] = titleArray[0].replace(
'&',
'and')
141 self.
persistence[
'tributecaThumbnailLink'] = urllib.parse.quote_plus(titleArray[0].lower().replace(
' ',
'_').replace(
"'",
'').replace(
'-',
'_').replace(
'?',
'').replace(
'.',
'').encode(
"utf-8"))
142 titleArray[0] = urllib.parse.quote_plus(re.sub(
'[%s]' % re.escape(string.punctuation),
'', titleArray[0].lower().replace(
' ',
'').encode(
"utf-8")))
145 videocode =
'%s%s' % (titleArray[0], trailer2)
146 flvURL = downloadURL % videocode
147 resultCheckUrl = common.checkURL(flvURL)
148 if not resultCheckUrl[0]
or resultCheckUrl[1][
'Content-Type'] !=
'video/x-flv':
150 videocode = titleArray[0]
151 flvURL = downloadURL % titleArray[0]
152 resultCheckUrl = common.checkURL(flvURL)
153 if not resultCheckUrl[0]
or resultCheckUrl[1][
'Content-Type'] !=
'video/x-flv':
156 videocode = titleArray[0]+
'tr2'
157 flvURL = downloadURL % videocode
158 resultCheckUrl = common.checkURL(flvURL)
159 if not resultCheckUrl[0]
or resultCheckUrl[1][
'Content-Type'] !=
'video/x-flv':
160 if currentTitle.find(
': An IMAX') == -1
and currentTitle.find(
': ') != -1:
161 titleArray[0] = currentTitle.replace(
'&',
'and')
162 titleArray[0] = urllib.parse.quote_plus(re.sub(
'[%s]' % re.escape(string.punctuation),
'', titleArray[0].lower().replace(
' ',
'').encode(
"utf-8")))
163 videocode = titleArray[0]
164 flvURL = downloadURL % videocode
165 resultCheckUrl = common.checkURL(flvURL)
166 if not resultCheckUrl[0]
or resultCheckUrl[1][
'Content-Type'] !=
'video/x-flv':
171 self.
persistence[
'tributecaLinkGeneration'] = videocode
172 return common.linkWebPage(
'dummycontext',
'tributeca')+videocode
174 self.
persistence[
'tributecaLinkGeneration'] = flvURL
179 '''Verify that the thumbnail actually exists. If it does not then use the site image.
180 Call example: 'mnvXpath:tributecaThumbnailLink(string(.//img/@src))'
181 return the thumbnail url
183 siteImage = 'http://www.tribute.ca/images/tribute_title.gif'
184 if not len(args[0])
or not self.
persistence[
'tributecaThumbnailLink']:
187 if args[0].startswith(
'http:'):
188 url = args[0].strip()
190 url =
'http://www.tribute.ca/tribute_objects/images/movies/%s%s' % (self.
persistence[
'tributecaThumbnailLink'],
'/poster.jpg')
191 resultCheckUrl = common.checkURL(url)
192 if not resultCheckUrl[0]
or resultCheckUrl[1][
'Content-Type'] !=
'image/jpeg':
199 '''Take a top ten title and add a leading '0' if less than 10 as it forces correct sort order
200 Call example: 'mnvXpath:tributecaTopTenTitle(string(..))'
201 return a replacement title
206 index = args[0].
find(
'.')
214 '''Check if the link is for a custom HTML
215 Example call: mnvXpath:isCustomHTML(('dummy'))
216 return True if the link does
not starts
with "http://"
217 return False if the link starts
with "http://"
219 if self.
persistence[
'tributecaLinkGeneration']
is None:
222 if self.
persistence[
'tributecaLinkGeneration'].startswith(
'http://'):
229 '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
230 has a matching item. This is done to save accessing the Internet when
not required.
231 Call example:
'mnvXpath:tributecaCheckIfDBItem(.)'
232 return True if a match was found
233 return False if a match was
not found
235 return common.checkIfDBItem(
'dummy', {
'feedtitle':
'Movie Trailers',
'title': arg[0].replace(
'Trailer',
'').strip(),
'author': arg[1],
'description': arg[2]})
239 ''' Routine used to get specific anchor elements.
240 Unfortunitely position dependant.
247 ''' Routine only used for debugging. Prints out the node
248 passed as an argument. Not to be used
in production.
251 testpath = etree.XPath(".//a", namespaces=common.namespaces)
255 sys.stdout.write(
'\nElement Count (%s):\n' % count)
258 print(
"testpath(%s)" % testpath(x))
262 return "========tributecaDebug Called========="
278 '''Example of an XSLT extension. This code must be changed to do anything useful!!!
281 def execute(self, context, self_node, input_node, output_parent):
282 copyItem = deepcopy(input_node)
283 min_sec = copyItem.xpath(
'duration')[0].text.split(
':')
285 for count
in range(len(min_sec)):
286 seconds+=int(min_sec[count])*(60*(len(min_sec)-count-1))
287 output_parent.text =
'%s' % seconds
def __init__(self, outstream, encoding=None)
def __getattr__(self, attr)
def tributecaDebug(self, context, *arg)
def tributecaTopTenTitle(self, context, *args)
def tributecaIsCustomHTML(self, context, *args)
def tributecaGetAnchors(self, context, *arg)
def tributecaThumbnailLink(self, context, *args)
def tributecaLinkGeneration(self, context, *args)
Start of XPath extension functions.
def tributecaCheckIfDBItem(self, context, *arg)
def execute(self, context, self_node, input_node, output_parent)
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)