15 __title__ =
"tributeca_api - XPath and XSLT functions for the Tribute.ca grabber"
16 __author__=
"R.D. Vaughan"
18 This python script is intended to perform a variety of utility functions
19 for the conversion of data to the MNV standard RSS output format.
20 See this link for the specifications:
21 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
30 __xpathClassList__ = [
'xpathFunctions', ]
34 __xsltExtentionList__ = []
36 import os, sys, re, time, datetime, shutil, urllib.request, urllib.parse, urllib.error, string
37 from copy
import deepcopy
41 """Wraps a stream with an encoder"""
50 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
51 if isinstance(obj, str):
54 self.
out.buffer.write(obj)
59 """Delegate everything but write to the stream"""
60 return getattr(self.
out, attr)
62 if isinstance(sys.stdout, io.TextIOWrapper):
67 from io
import StringIO
68 from lxml
import etree
69 except Exception
as e:
70 sys.stderr.write(
'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
78 for digit
in etree.LIBXML_VERSION:
79 version+=str(digit)+
'.'
80 version = version[:-1]
83 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
84 At least "libxml" version 2.7.2 must be installed. Your version is (%s).
90 """Functions specific extending XPath
93 self.
functList = [
'tributecaLinkGeneration',
'tributecaThumbnailLink',
'tributecaTopTenTitle',
'tributecaIsCustomHTML',
'tributecaCheckIfDBItem',
'tributecaDebug',
'tributecaGetAnchors', ]
95 self.
anchorList = etree.XPath(
".//a", namespaces=common.namespaces)
106 '''Generate a link for the Tribute.ca site. Sigificant massaging of the title is required.
107 Call example: 'mnvXpath:tributecaLinkGeneration(position(), ..//a)'
110 downloadURL =
'http://www.tribute.ca/streamingflash/%s.flv'
111 position = int(args[0])-1
112 webURL =
'http://www.tribute.ca%s' % args[1][position].attrib[
'href'].strip()
116 if self.
persistence[
'tributecaLinkGeneration']
is not None:
117 returnValue = self.
persistence[
'tributecaLinkGeneration']
119 if returnValue != webURL:
120 return downloadURL % returnValue
124 currentTitle = self.
TextTail(args[1][position]).strip()
128 previousTitle = self.
TextTail(args[1][position-1]).strip()
131 titleArray = [currentTitle, previousTitle]
132 if titleArray[0].startswith(
'IMAX:'):
133 titleArray[0] = titleArray[0].replace(
'IMAX:',
'').strip()
137 for counter
in range(len(titleArray)):
138 index = titleArray[counter].
find(
": ")
140 titleArray[counter] = titleArray[counter][:index].strip()
141 index = titleArray[counter].
find(
" (")
143 titleArray[counter] = titleArray[counter][:index].strip()
144 if titleArray[0].startswith(titleArray[1])
and titleArray[1]:
145 index = titleArray[counter].
find(
"3D")
147 titleArray[counter] = titleArray[counter][:index].strip()
151 if titleArray[0].startswith(titleArray[1])
and titleArray[1]:
153 if currentTitle.find(
': An IMAX') != -1:
155 titleArray[0] = titleArray[0].replace(
'&',
'and')
156 self.
persistence[
'tributecaThumbnailLink'] = urllib.parse.quote_plus(titleArray[0].lower().replace(
' ',
'_').replace(
"'",
'').replace(
'-',
'_').replace(
'?',
'').replace(
'.',
'').encode(
"utf-8"))
157 titleArray[0] = urllib.parse.quote_plus(re.sub(
'[%s]' % re.escape(string.punctuation),
'', titleArray[0].lower().replace(
' ',
'').encode(
"utf-8")))
160 videocode =
'%s%s' % (titleArray[0], trailer2)
161 flvURL = downloadURL % videocode
162 resultCheckUrl = common.checkURL(flvURL)
163 if not resultCheckUrl[0]
or resultCheckUrl[1][
'Content-Type'] !=
'video/x-flv':
165 videocode = titleArray[0]
166 flvURL = downloadURL % titleArray[0]
167 resultCheckUrl = common.checkURL(flvURL)
168 if not resultCheckUrl[0]
or resultCheckUrl[1][
'Content-Type'] !=
'video/x-flv':
171 videocode = titleArray[0]+
'tr2'
172 flvURL = downloadURL % videocode
173 resultCheckUrl = common.checkURL(flvURL)
174 if not resultCheckUrl[0]
or resultCheckUrl[1][
'Content-Type'] !=
'video/x-flv':
175 if currentTitle.find(
': An IMAX') == -1
and currentTitle.find(
': ') != -1:
176 titleArray[0] = currentTitle.replace(
'&',
'and')
177 titleArray[0] = urllib.parse.quote_plus(re.sub(
'[%s]' % re.escape(string.punctuation),
'', titleArray[0].lower().replace(
' ',
'').encode(
"utf-8")))
178 videocode = titleArray[0]
179 flvURL = downloadURL % videocode
180 resultCheckUrl = common.checkURL(flvURL)
181 if not resultCheckUrl[0]
or resultCheckUrl[1][
'Content-Type'] !=
'video/x-flv':
186 self.
persistence[
'tributecaLinkGeneration'] = videocode
187 return common.linkWebPage(
'dummycontext',
'tributeca')+videocode
189 self.
persistence[
'tributecaLinkGeneration'] = flvURL
194 '''Verify that the thumbnail actually exists. If it does not then use the site image.
195 Call example: 'mnvXpath:tributecaThumbnailLink(string(.//img/@src))'
196 return the thumbnail url
198 siteImage =
'http://www.tribute.ca/images/tribute_title.gif'
199 if not len(args[0])
or not self.
persistence[
'tributecaThumbnailLink']:
202 if args[0].startswith(
'http:'):
203 url = args[0].strip()
205 url =
'http://www.tribute.ca/tribute_objects/images/movies/%s%s' % (self.
persistence[
'tributecaThumbnailLink'],
'/poster.jpg')
206 resultCheckUrl = common.checkURL(url)
207 if not resultCheckUrl[0]
or resultCheckUrl[1][
'Content-Type'] !=
'image/jpeg':
214 '''Take a top ten title and add a leading '0' if less than 10 as it forces correct sort order
215 Call example: 'mnvXpath:tributecaTopTenTitle(string(..))'
216 return a replacement title
221 index = args[0].
find(
'.')
229 '''Check if the link is for a custom HTML
230 Example call: mnvXpath:isCustomHTML(('dummy'))
231 return True if the link does not starts with "http://"
232 return False if the link starts with "http://"
234 if self.
persistence[
'tributecaLinkGeneration']
is None:
237 if self.
persistence[
'tributecaLinkGeneration'].startswith(
'http://'):
244 '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
245 has a matching item. This is done to save accessing the Internet when not required.
246 Call example: 'mnvXpath:tributecaCheckIfDBItem(.)'
247 return True if a match was found
248 return False if a match was not found
250 return common.checkIfDBItem(
'dummy', {
'feedtitle':
'Movie Trailers',
'title': arg[0].replace(
'Trailer',
'').strip(),
'author': arg[1],
'description': arg[2]})
254 ''' Routine used to get specific anchor elements.
255 Unfortunitely position dependant.
256 Call: mnvXpath:tributecaGetAnchors(//ul[@class='clump'], 3)
262 ''' Routine only used for debugging. Prints out the node
263 passed as an argument. Not to be used in production.
264 Call example: mnvXpath:tributecaDebug(//a)
266 testpath = etree.XPath(
".//a", namespaces=common.namespaces)
270 sys.stdout.write(
'\nElement Count (%s):\n' % count)
273 print(
"testpath(%s)" % testpath(x))
277 return "========tributecaDebug Called========="
293 '''Example of an XSLT extension. This code must be changed to do anything useful!!!
296 def execute(self, context, self_node, input_node, output_parent):
297 copyItem = deepcopy(input_node)
298 min_sec = copyItem.xpath(
'duration')[0].text.split(
':')
300 for count
in range(len(min_sec)):
301 seconds+=int(min_sec[count])*(60*(len(min_sec)-count-1))
302 output_parent.text =
'%s' % seconds