MythTV master
tributeca_api.py
Go to the documentation of this file.
1# -*- coding: UTF-8 -*-
2
3# ----------------------
4# Name: tributeca_api - XPath and XSLT functions for the Tribute.ca grabber
5# Python Script
6# Author: R.D. Vaughan
7# Purpose: This python script is intended to perform a variety of utility functions
8# for the conversion of data to the MNV standard RSS output format.
9# See this link for the specifications:
10# http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
11#
12# License:Creative Commons GNU GPL v2
13# (http://creativecommons.org/licenses/GPL/2.0/)
14#-------------------------------------
15__title__ ="tributeca_api - XPath and XSLT functions for the Tribute.ca grabber"
16__author__="R.D. Vaughan"
17__purpose__='''
18This python script is intended to perform a variety of utility functions
19for the conversion of data to the MNV standard RSS output format.
20See this link for the specifications:
21http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
22'''
23
24__version__="v0.1.1"
25# 0.1.0 Initial development
26# 0.1.1 Changes to due to Web site modifications
27
28
29# Specify the class names that have XPath extention functions
30__xpathClassList__ = ['xpathFunctions', ]
31
32# Specify the XSLT extention class names. Each class is a stand lone extention function
33#__xsltExtentionList__ = ['xsltExtExample', ]
34__xsltExtentionList__ = []
35
36import os, sys, re, time, datetime, shutil, urllib.request, urllib.parse, urllib.error, string
37from copy import deepcopy
38
39
40class OutStreamEncoder(object):
41 """Wraps a stream with an encoder"""
42 def __init__(self, outstream, encoding=None):
43 self.out = outstream
44 if not encoding:
45 self.encoding = sys.getfilesystemencoding()
46 else:
47 self.encoding = encoding
48
49 def write(self, obj):
50 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
51 if isinstance(obj, str):
52 obj = obj.encode(self.encoding)
53 try:
54 self.out.buffer.write(obj)
55 except OSError:
56 pass
57
58 def __getattr__(self, attr):
59 """Delegate everything but write to the stream"""
60 return getattr(self.out, attr)
61
62if isinstance(sys.stdout, io.TextIOWrapper):
63 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
64 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
65
66try:
67 from io import StringIO
68 from lxml import etree
69except Exception as e:
70 sys.stderr.write('\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
71 sys.exit(1)
72
73
74class xpathFunctions(object):
75 """Functions specific extending XPath
76 """
77 def __init__(self):
78 self.functList = ['tributecaLinkGeneration', 'tributecaThumbnailLink', 'tributecaTopTenTitle', 'tributecaIsCustomHTML', 'tributecaCheckIfDBItem', 'tributecaDebug', 'tributecaGetAnchors', ]
79 self.TextTail = etree.XPath("string()")
80 self.anchorList = etree.XPath(".//a", namespaces=common.namespaces)
81 self.persistence = {}
82 # end __init__()
83
84
89
90 def tributecaLinkGeneration(self, context, *args):
91 '''Generate a link for the Tribute.ca site. Sigificant massaging of the title is required.
92 Call example: 'mnvXpath:tributecaLinkGeneration(position(), ..//a)'
93 return the url link
94 '''
95 downloadURL = 'http://www.tribute.ca/streamingflash/%s.flv'
96 position = int(args[0])-1
97 webURL = 'http://www.tribute.ca%s' % args[1][position].attrib['href'].strip()
98
99 # If this is for the download then just return what was found for the "link" element
100 if 'tributecaLinkGeneration' in self.persistence:
101 if self.persistence['tributecaLinkGeneration'] is not None:
102 returnValue = self.persistence['tributecaLinkGeneration']
103 self.persistence['tributecaLinkGeneration'] = None
104 if returnValue != webURL:
105 return downloadURL % returnValue
106 else:
107 return webURL
108
109 currentTitle = self.TextTail(args[1][position]).strip()
110 if position == 0:
111 previousTitle = ''
112 else:
113 previousTitle = self.TextTail(args[1][position-1]).strip()
114
115 # Rule: "IMAX: Hubble 3D": http://www.tribute.ca/streamingflash/hubble3d.flv
116 titleArray = [currentTitle, previousTitle]
117 if titleArray[0].startswith('IMAX:'):
118 titleArray[0] = titleArray[0].replace('IMAX:', '').strip()
119 else:
120 # Rule: "How to Train Your Dragon: An IMAX 3D Experience" did not even have a trailer
121 # on the Web page but stip off anything after the ":"
122 for counter in range(len(titleArray)):
123 index = titleArray[counter].find(": ")
124 if index != -1:
125 titleArray[counter] = titleArray[counter][:index].strip()
126 index = titleArray[counter].find(" (")
127 if index != -1:
128 titleArray[counter] = titleArray[counter][:index].strip()
129 if titleArray[0].startswith(titleArray[1]) and titleArray[1]:
130 index = titleArray[counter].find("3D")
131 if index != -1:
132 titleArray[counter] = titleArray[counter][:index].strip()
133
134 # If the previous title starts with the same title as the current then this is trailer #2
135 trailer2 = ''
136 if titleArray[0].startswith(titleArray[1]) and titleArray[1]:
137 trailer2 = 'tr2'
138 if currentTitle.find(': An IMAX') != -1:
139 trailer2 = 'tr2'
140 titleArray[0] = titleArray[0].replace('&', 'and')
141 self.persistence['tributecaThumbnailLink'] = urllib.parse.quote_plus(titleArray[0].lower().replace(' ', '_').replace("'", '').replace('-', '_').replace('?', '').replace('.', '').encode("utf-8"))
142 titleArray[0] = urllib.parse.quote_plus(re.sub('[%s]' % re.escape(string.punctuation), '', titleArray[0].lower().replace(' ', '').encode("utf-8")))
143
144 # Verify that the FLV file url really exits. If it does not then use the Web page link.
145 videocode = '%s%s' % (titleArray[0], trailer2)
146 flvURL = downloadURL % videocode
147 resultCheckUrl = common.checkURL(flvURL)
148 if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != 'video/x-flv':
149 if trailer2 != '':
150 videocode = titleArray[0]
151 flvURL = downloadURL % titleArray[0]
152 resultCheckUrl = common.checkURL(flvURL) # Drop the 'tr2' this time
153 if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != 'video/x-flv':
154 flvURL = webURL
155 else:
156 videocode = titleArray[0]+'tr2'
157 flvURL = downloadURL % videocode
158 resultCheckUrl = common.checkURL(flvURL) # Add the 'tr2' this time
159 if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != 'video/x-flv':
160 if currentTitle.find(': An IMAX') == -1 and currentTitle.find(': ') != -1:
161 titleArray[0] = currentTitle.replace('&', 'and')
162 titleArray[0] = urllib.parse.quote_plus(re.sub('[%s]' % re.escape(string.punctuation), '', titleArray[0].lower().replace(' ', '').encode("utf-8")))
163 videocode = titleArray[0]
164 flvURL = downloadURL % videocode
165 resultCheckUrl = common.checkURL(flvURL) # Add the 'tr2' this time
166 if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != 'video/x-flv':
167 flvURL = webURL
168 else:
169 flvURL = webURL
170 if flvURL != webURL:
171 self.persistence['tributecaLinkGeneration'] = videocode
172 return common.linkWebPage('dummycontext', 'tributeca')+videocode
173 else:
174 self.persistence['tributecaLinkGeneration'] = flvURL
175 return flvURL
176 # end linkGeneration()
177
178 def tributecaThumbnailLink(self, context, *args):
179 '''Verify that the thumbnail actually exists. If it does not then use the site image.
180 Call example: 'mnvXpath:tributecaThumbnailLink(string(.//img/@src))'
181 return the thumbnail url
182 '''
183 siteImage = 'http://www.tribute.ca/images/tribute_title.gif'
184 if not len(args[0]) or not self.persistence['tributecaThumbnailLink']:
185 return siteImage
186
187 if args[0].startswith('http:'):
188 url = args[0].strip()
189 else:
190 url = 'http://www.tribute.ca/tribute_objects/images/movies/%s%s' % (self.persistence['tributecaThumbnailLink'], '/poster.jpg')
191 resultCheckUrl = common.checkURL(url)
192 if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != 'image/jpeg':
193 return siteImage
194
195 return url
196 # end tributecaThumbnailLink()
197
198 def tributecaTopTenTitle(self, context, *args):
199 '''Take a top ten title and add a leading '0' if less than 10 as it forces correct sort order
200 Call example: 'mnvXpath:tributecaTopTenTitle(string(..))'
201 return a replacement title
202 '''
203 if not len(args[0]):
204 return args[0]
205
206 index = args[0].find('.')
207 if index == 1:
208 return '0'+args[0]
209 else:
210 return args[0]
211 # end tributecaTopTenTitle()
212
213 def tributecaIsCustomHTML(self, context, *args):
214 '''Check if the link is for a custom HTML
215 Example call: mnvXpath:isCustomHTML(('dummy'))
216 return True if the link does not starts with "http://"
217 return False if the link starts with "http://"
218 '''
219 if self.persistence['tributecaLinkGeneration'] is None:
220 return False
221
222 if self.persistence['tributecaLinkGeneration'].startswith('http://'):
223 return False
224 else:
225 return True
226 # end isCustomHTML()
227
228 def tributecaCheckIfDBItem(self, context, *arg):
229 '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
230 has a matching item. This is done to save accessing the Internet when not required.
231 Call example: 'mnvXpath:tributecaCheckIfDBItem(.)'
232 return True if a match was found
233 return False if a match was not found
234 '''
235 return common.checkIfDBItem('dummy', {'feedtitle': 'Movie Trailers', 'title': arg[0].replace('Trailer', '').strip(), 'author': arg[1], 'description': arg[2]})
236 # end tributecaCheckIfDBItem()
237
238 def tributecaGetAnchors(self, context, *arg):
239 ''' Routine used to get specific anchor elements.
240 Unfortunitely position dependant.
241 Call: mnvXpath:tributecaGetAnchors(//ul[@class='clump'], 3)
242 '''
243 return self.anchorList(arg[0][int(arg[1])])
244 # end tributecaGetAnchors()
245
246 def tributecaDebug(self, context, *arg):
247 ''' Routine only used for debugging. Prints out the node
248 passed as an argument. Not to be used in production.
249 Call example: mnvXpath:tributecaDebug(//a)
250 '''
251 testpath = etree.XPath(".//a", namespaces=common.namespaces)
252 print(arg)
253 count = 0
254 for x in arg:
255 sys.stdout.write('\nElement Count (%s):\n' % count)
256# for y in testpath(x):
257# sys.stdout.write(etree.tostring(y, encoding='UTF-8', pretty_print=True))
258 print("testpath(%s)" % testpath(x))
259 count+=1
260 print()
261# sys.stdout.write(etree.tostring(arg[0], encoding='UTF-8', pretty_print=True))
262 return "========tributecaDebug Called========="
263 # end tributecaDebug()
264
265
270
271
276
277class xsltExtExample(etree.XSLTExtension):
278 '''Example of an XSLT extension. This code must be changed to do anything useful!!!
279 return nothing
280 '''
281 def execute(self, context, self_node, input_node, output_parent):
282 copyItem = deepcopy(input_node)
283 min_sec = copyItem.xpath('duration')[0].text.split(':')
284 seconds = 0
285 for count in range(len(min_sec)):
286 seconds+=int(min_sec[count])*(60*(len(min_sec)-count-1))
287 output_parent.text = '%s' % seconds
288
289
def tributecaLinkGeneration(self, context, *args)
Start of XPath extension functions.
def execute(self, context, self_node, input_node, output_parent)
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)
static void print(const QList< uint > &raw_minimas, const QList< uint > &raw_maximas, const QList< float > &minimas, const QList< float > &maximas)