MythTV  master
tributeca_api.py
Go to the documentation of this file.
1 # -*- coding: UTF-8 -*-
2 
3 # ----------------------
4 # Name: tributeca_api - XPath and XSLT functions for the Tribute.ca grabber
5 # Python Script
6 # Author: R.D. Vaughan
7 # Purpose: This python script is intended to perform a variety of utility functions
8 # for the conversion of data to the MNV standard RSS output format.
9 # See this link for the specifications:
10 # http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
11 #
12 # License:Creative Commons GNU GPL v2
13 # (http://creativecommons.org/licenses/GPL/2.0/)
14 #-------------------------------------
15 __title__ ="tributeca_api - XPath and XSLT functions for the Tribute.ca grabber"
16 __author__="R.D. Vaughan"
17 __purpose__='''
18 This python script is intended to perform a variety of utility functions
19 for the conversion of data to the MNV standard RSS output format.
20 See this link for the specifications:
21 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
22 '''
23 
24 __version__="v0.1.1"
25 # 0.1.0 Initial development
26 # 0.1.1 Changes to due to Web site modifications
27 
28 
29 # Specify the class names that have XPath extention functions
30 __xpathClassList__ = ['xpathFunctions', ]
31 
32 # Specify the XSLT extention class names. Each class is a stand lone extention function
33 #__xsltExtentionList__ = ['xsltExtExample', ]
34 __xsltExtentionList__ = []
35 
36 import os, sys, re, time, datetime, shutil, urllib.request, urllib.parse, urllib.error, string
37 from copy import deepcopy
38 
39 
40 class OutStreamEncoder(object):
41  """Wraps a stream with an encoder"""
42  def __init__(self, outstream, encoding=None):
43  self.out = outstream
44  if not encoding:
45  self.encoding = sys.getfilesystemencoding()
46  else:
47  self.encoding = encoding
48 
49  def write(self, obj):
50  """Wraps the output stream, encoding Unicode strings with the specified encoding"""
51  if isinstance(obj, str):
52  obj = obj.encode(self.encoding)
53  try:
54  self.out.buffer.write(obj)
55  except OSError:
56  pass
57 
58  def __getattr__(self, attr):
59  """Delegate everything but write to the stream"""
60  return getattr(self.out, attr)
61 
62 if isinstance(sys.stdout, io.TextIOWrapper):
63  sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
64  sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
65 
66 try:
67  from io import StringIO
68  from lxml import etree
69 except Exception as e:
70  sys.stderr.write('\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
71  sys.exit(1)
72 
73 # Check that the lxml library is current enough
74 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
75 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
76 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
77 version = ''
78 for digit in etree.LIBXML_VERSION:
79  version+=str(digit)+'.'
80 version = version[:-1]
81 if version < '2.7.2':
82  sys.stderr.write('''
83 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
84  At least "libxml" version 2.7.2 must be installed. Your version is (%s).
85 ''' % version)
86  sys.exit(1)
87 
88 
89 class xpathFunctions(object):
90  """Functions specific extending XPath
91  """
92  def __init__(self):
93  self.functList = ['tributecaLinkGeneration', 'tributecaThumbnailLink', 'tributecaTopTenTitle', 'tributecaIsCustomHTML', 'tributecaCheckIfDBItem', 'tributecaDebug', 'tributecaGetAnchors', ]
94  self.TextTail = etree.XPath("string()")
95  self.anchorList = etree.XPath(".//a", namespaces=common.namespaces)
96  self.persistence = {}
97  # end __init__()
98 
99 
104 
105  def tributecaLinkGeneration(self, context, *args):
106  '''Generate a link for the Tribute.ca site. Sigificant massaging of the title is required.
107  Call example: 'mnvXpath:tributecaLinkGeneration(position(), ..//a)'
108  return the url link
109  '''
110  downloadURL = 'http://www.tribute.ca/streamingflash/%s.flv'
111  position = int(args[0])-1
112  webURL = 'http://www.tribute.ca%s' % args[1][position].attrib['href'].strip()
113 
114  # If this is for the download then just return what was found for the "link" element
115  if 'tributecaLinkGeneration' in self.persistence:
116  if self.persistence['tributecaLinkGeneration'] is not None:
117  returnValue = self.persistence['tributecaLinkGeneration']
118  self.persistence['tributecaLinkGeneration'] = None
119  if returnValue != webURL:
120  return downloadURL % returnValue
121  else:
122  return webURL
123 
124  currentTitle = self.TextTail(args[1][position]).strip()
125  if position == 0:
126  previousTitle = ''
127  else:
128  previousTitle = self.TextTail(args[1][position-1]).strip()
129 
130  # Rule: "IMAX: Hubble 3D": http://www.tribute.ca/streamingflash/hubble3d.flv
131  titleArray = [currentTitle, previousTitle]
132  if titleArray[0].startswith('IMAX:'):
133  titleArray[0] = titleArray[0].replace('IMAX:', '').strip()
134  else:
135  # Rule: "How to Train Your Dragon: An IMAX 3D Experience" did not even have a trailer
136  # on the Web page but stip off anything after the ":"
137  for counter in range(len(titleArray)):
138  index = titleArray[counter].find(": ")
139  if index != -1:
140  titleArray[counter] = titleArray[counter][:index].strip()
141  index = titleArray[counter].find(" (")
142  if index != -1:
143  titleArray[counter] = titleArray[counter][:index].strip()
144  if titleArray[0].startswith(titleArray[1]) and titleArray[1]:
145  index = titleArray[counter].find("3D")
146  if index != -1:
147  titleArray[counter] = titleArray[counter][:index].strip()
148 
149  # If the previous title starts with the same title as the current then this is trailer #2
150  trailer2 = ''
151  if titleArray[0].startswith(titleArray[1]) and titleArray[1]:
152  trailer2 = 'tr2'
153  if currentTitle.find(': An IMAX') != -1:
154  trailer2 = 'tr2'
155  titleArray[0] = titleArray[0].replace('&', 'and')
156  self.persistence['tributecaThumbnailLink'] = urllib.parse.quote_plus(titleArray[0].lower().replace(' ', '_').replace("'", '').replace('-', '_').replace('?', '').replace('.', '').encode("utf-8"))
157  titleArray[0] = urllib.parse.quote_plus(re.sub('[%s]' % re.escape(string.punctuation), '', titleArray[0].lower().replace(' ', '').encode("utf-8")))
158 
159  # Verify that the FLV file url really exits. If it does not then use the Web page link.
160  videocode = '%s%s' % (titleArray[0], trailer2)
161  flvURL = downloadURL % videocode
162  resultCheckUrl = common.checkURL(flvURL)
163  if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != 'video/x-flv':
164  if trailer2 != '':
165  videocode = titleArray[0]
166  flvURL = downloadURL % titleArray[0]
167  resultCheckUrl = common.checkURL(flvURL) # Drop the 'tr2' this time
168  if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != 'video/x-flv':
169  flvURL = webURL
170  else:
171  videocode = titleArray[0]+'tr2'
172  flvURL = downloadURL % videocode
173  resultCheckUrl = common.checkURL(flvURL) # Add the 'tr2' this time
174  if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != 'video/x-flv':
175  if currentTitle.find(': An IMAX') == -1 and currentTitle.find(': ') != -1:
176  titleArray[0] = currentTitle.replace('&', 'and')
177  titleArray[0] = urllib.parse.quote_plus(re.sub('[%s]' % re.escape(string.punctuation), '', titleArray[0].lower().replace(' ', '').encode("utf-8")))
178  videocode = titleArray[0]
179  flvURL = downloadURL % videocode
180  resultCheckUrl = common.checkURL(flvURL) # Add the 'tr2' this time
181  if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != 'video/x-flv':
182  flvURL = webURL
183  else:
184  flvURL = webURL
185  if flvURL != webURL:
186  self.persistence['tributecaLinkGeneration'] = videocode
187  return common.linkWebPage('dummycontext', 'tributeca')+videocode
188  else:
189  self.persistence['tributecaLinkGeneration'] = flvURL
190  return flvURL
191  # end linkGeneration()
192 
193  def tributecaThumbnailLink(self, context, *args):
194  '''Verify that the thumbnail actually exists. If it does not then use the site image.
195  Call example: 'mnvXpath:tributecaThumbnailLink(string(.//img/@src))'
196  return the thumbnail url
197  '''
198  siteImage = 'http://www.tribute.ca/images/tribute_title.gif'
199  if not len(args[0]) or not self.persistence['tributecaThumbnailLink']:
200  return siteImage
201 
202  if args[0].startswith('http:'):
203  url = args[0].strip()
204  else:
205  url = 'http://www.tribute.ca/tribute_objects/images/movies/%s%s' % (self.persistence['tributecaThumbnailLink'], '/poster.jpg')
206  resultCheckUrl = common.checkURL(url)
207  if not resultCheckUrl[0] or resultCheckUrl[1]['Content-Type'] != 'image/jpeg':
208  return siteImage
209 
210  return url
211  # end tributecaThumbnailLink()
212 
213  def tributecaTopTenTitle(self, context, *args):
214  '''Take a top ten title and add a leading '0' if less than 10 as it forces correct sort order
215  Call example: 'mnvXpath:tributecaTopTenTitle(string(..))'
216  return a replacement title
217  '''
218  if not len(args[0]):
219  return args[0]
220 
221  index = args[0].find('.')
222  if index == 1:
223  return '0'+args[0]
224  else:
225  return args[0]
226  # end tributecaTopTenTitle()
227 
228  def tributecaIsCustomHTML(self, context, *args):
229  '''Check if the link is for a custom HTML
230  Example call: mnvXpath:isCustomHTML(('dummy'))
231  return True if the link does not starts with "http://"
232  return False if the link starts with "http://"
233  '''
234  if self.persistence['tributecaLinkGeneration'] is None:
235  return False
236 
237  if self.persistence['tributecaLinkGeneration'].startswith('http://'):
238  return False
239  else:
240  return True
241  # end isCustomHTML()
242 
243  def tributecaCheckIfDBItem(self, context, *arg):
244  '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
245  has a matching item. This is done to save accessing the Internet when not required.
246  Call example: 'mnvXpath:tributecaCheckIfDBItem(.)'
247  return True if a match was found
248  return False if a match was not found
249  '''
250  return common.checkIfDBItem('dummy', {'feedtitle': 'Movie Trailers', 'title': arg[0].replace('Trailer', '').strip(), 'author': arg[1], 'description': arg[2]})
251  # end tributecaCheckIfDBItem()
252 
253  def tributecaGetAnchors(self, context, *arg):
254  ''' Routine used to get specific anchor elements.
255  Unfortunitely position dependant.
256  Call: mnvXpath:tributecaGetAnchors(//ul[@class='clump'], 3)
257  '''
258  return self.anchorList(arg[0][int(arg[1])])
259  # end tributecaGetAnchors()
260 
261  def tributecaDebug(self, context, *arg):
262  ''' Routine only used for debugging. Prints out the node
263  passed as an argument. Not to be used in production.
264  Call example: mnvXpath:tributecaDebug(//a)
265  '''
266  testpath = etree.XPath(".//a", namespaces=common.namespaces)
267  print(arg)
268  count = 0
269  for x in arg:
270  sys.stdout.write('\nElement Count (%s):\n' % count)
271 # for y in testpath(x):
272 # sys.stdout.write(etree.tostring(y, encoding='UTF-8', pretty_print=True))
273  print("testpath(%s)" % testpath(x))
274  count+=1
275  print()
276 # sys.stdout.write(etree.tostring(arg[0], encoding='UTF-8', pretty_print=True))
277  return "========tributecaDebug Called========="
278  # end tributecaDebug()
279 
280 
285 
286 
291 
292 class xsltExtExample(etree.XSLTExtension):
293  '''Example of an XSLT extension. This code must be changed to do anything useful!!!
294  return nothing
295  '''
296  def execute(self, context, self_node, input_node, output_parent):
297  copyItem = deepcopy(input_node)
298  min_sec = copyItem.xpath('duration')[0].text.split(':')
299  seconds = 0
300  for count in range(len(min_sec)):
301  seconds+=int(min_sec[count])*(60*(len(min_sec)-count-1))
302  output_parent.text = '%s' % seconds
303 
304 
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions.persistence
persistence
Definition: tributeca_api.py:96
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions.tributecaCheckIfDBItem
def tributecaCheckIfDBItem(self, context, *arg)
Definition: tributeca_api.py:243
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions.__init__
def __init__(self)
Definition: tributeca_api.py:92
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions.tributecaGetAnchors
def tributecaGetAnchors(self, context, *arg)
Definition: tributeca_api.py:253
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions.functList
functList
Definition: tributeca_api.py:93
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions.tributecaTopTenTitle
def tributecaTopTenTitle(self, context, *args)
Definition: tributeca_api.py:213
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions.tributecaThumbnailLink
def tributecaThumbnailLink(self, context, *args)
Definition: tributeca_api.py:193
nv_python_libs.xsltfunctions.tributeca_api.xsltExtExample
Definition: tributeca_api.py:292
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions.tributecaDebug
def tributecaDebug(self, context, *arg)
Definition: tributeca_api.py:261
nv_python_libs.xsltfunctions.tributeca_api.OutStreamEncoder.__getattr__
def __getattr__(self, attr)
Definition: tributeca_api.py:58
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions.TextTail
TextTail
Definition: tributeca_api.py:94
nv_python_libs.xsltfunctions.tributeca_api.OutStreamEncoder.write
def write(self, obj)
Definition: tributeca_api.py:49
print
static void print(const QList< uint > &raw_minimas, const QList< uint > &raw_maximas, const QList< float > &minimas, const QList< float > &maximas)
Definition: vbi608extractor.cpp:29
nv_python_libs.xsltfunctions.tributeca_api.OutStreamEncoder.__init__
def __init__(self, outstream, encoding=None)
Definition: tributeca_api.py:42
nv_python_libs.xsltfunctions.tributeca_api.OutStreamEncoder.out
out
Definition: tributeca_api.py:43
nv_python_libs.xsltfunctions.tributeca_api.OutStreamEncoder
Definition: tributeca_api.py:40
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions.tributecaLinkGeneration
def tributecaLinkGeneration(self, context, *args)
Start of XPath extension functions.
Definition: tributeca_api.py:105
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions.tributecaIsCustomHTML
def tributecaIsCustomHTML(self, context, *args)
Definition: tributeca_api.py:228
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions.anchorList
anchorList
Definition: tributeca_api.py:95
nv_python_libs.xsltfunctions.tributeca_api.xsltExtExample.execute
def execute(self, context, self_node, input_node, output_parent)
Definition: tributeca_api.py:296
nv_python_libs.xsltfunctions.tributeca_api.xpathFunctions
Definition: tributeca_api.py:89
find
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)
Definition: dvbstreamhandler.cpp:363
nv_python_libs.xsltfunctions.tributeca_api.OutStreamEncoder.encoding
encoding
Definition: tributeca_api.py:45