MythTV  master
youtubeXSL_api.py
Go to the documentation of this file.
1 # -*- coding: UTF-8 -*-
2 
3 # ----------------------
4 # Name: youtubeXSL_api - XPath and XSLT functions for the mashup grabbers
5 # Python Script
6 # Author: R.D. Vaughan
7 # Purpose: This python script is intended to perform a variety of utility functions
8 # for the conversion of data to the MNV standard RSS output format.
9 # See this link for the specifications:
10 # http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
11 #
12 # License:Creative Commons GNU GPL v2
13 # (http://creativecommons.org/licenses/GPL/2.0/)
14 #-------------------------------------
15 __title__ ="youtubeXSL_api - XPath and XSLT functions for the mashup grabbers"
16 __author__="R.D. Vaughan"
17 __purpose__='''
18 This python script is intended to perform a variety of utility functions
19 for the conversion of data to the MNV standard RSS output format.
20 See this link for the specifications:
21 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
22 '''
23 
24 __version__="v0.1.0"
25 # 0.1.0 Initial development
26 
27 
28 # Specify the class names that have XPath extention functions
29 __xpathClassList__ = ['xpathFunctions', ]
30 
31 # Specify the XSLT extention class names. Each class is a stand lone extention function
32 #__xsltExtentionList__ = ['xsltExtExample', ]
33 __xsltExtentionList__ = []
34 
35 import os, sys, re, time, datetime, shutil, urllib.request, urllib.parse, urllib.error, string
36 from copy import deepcopy
37 import io
38 
39 
40 class OutStreamEncoder(object):
41  """Wraps a stream with an encoder"""
42  def __init__(self, outstream, encoding=None):
43  self.out = outstream
44  if not encoding:
45  self.encoding = sys.getfilesystemencoding()
46  else:
47  self.encoding = encoding
48 
49  def write(self, obj):
50  """Wraps the output stream, encoding Unicode strings with the specified encoding"""
51  if isinstance(obj, str):
52  obj = obj.encode(self.encoding)
53  try:
54  self.out.buffer.write(obj)
55  except OSError:
56  pass
57 
58  def __getattr__(self, attr):
59  """Delegate everything but write to the stream"""
60  return getattr(self.out, attr)
61 
62 if isinstance(sys.stdout, io.TextIOWrapper):
63  sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
64  sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
65 
66 try:
67  from io import StringIO
68  from lxml import etree
69 except Exception as e:
70  sys.stderr.write('\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
71  sys.exit(1)
72 
73 # Check that the lxml library is current enough
74 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
75 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
76 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
77 version = ''
78 for digit in etree.LIBXML_VERSION:
79  version+=str(digit)+'.'
80 version = version[:-1]
81 if version < '2.7.2':
82  sys.stderr.write('''
83 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
84  At least "libxml" version 2.7.2 must be installed. Your version is (%s).
85 ''' % version)
86  sys.exit(1)
87 
88 
89 class xpathFunctions(object):
90  """Functions specific extending XPath
91  """
92  def __init__(self):
93  self.functList = ['youtubeTrailerFilter', 'youtubePaging', ]
95  # "trailer 7"
96  re.compile('''^.+?trailer\\ (?P<trailerNum>[0-9]+).*$''', re.UNICODE),
97  # "trailer #7"
98  re.compile('''^.+?trailer\\ \\#(?P<trailerNum>[0-9]+).*$''', re.UNICODE),
99  ]
100  # end __init__()
101 
102 
107 
108  def youtubeTrailerFilter(self, context, *args):
109  '''Generate a list of entry elements that are relevant to the requested search term. Basically
110  remove duplicate and non-relevant search results and order them to provide the best results
111  for the user.
112  Also set the paging variables.
113  Call example: 'mnvXpath:youtubeTrailerFilter(//atm:entry)'
114  return the list of relevant "entry" elements
115  '''
116  searchTerm = common.removePunc('dummy', common.searchterm.lower())
117  titleFilter = etree.XPath('.//atm:title', namespaces=common.namespaces)
118 
119  # Remove any leading word "The" from the search term
120  if searchTerm.startswith('the '):
121  searchTerm = searchTerm[4:].strip()
122 
123  titleDict = {}
124  for entry in args[0]:
125  titleDict[titleFilter(entry)[0].text] = entry
126 
127  # Tag so that there is an order plus duplicates can be easily spotted
128  filteredDict = {}
129  for key in list(titleDict.keys()):
130  title = common.removePunc('dummy', key.lower())
131  if title.startswith('the '):
132  title = title[4:].strip()
133  if searchTerm.find('new ') == -1:
134  title = title.replace('new ', '')
135  if searchTerm.find('official ') == -1:
136  title = title.replace('official ', '')
137  if title.find(searchTerm) != -1:
138  addOns = ''
139  HD = False
140  if searchTerm.find('game ') == -1:
141  if title.find('game') != -1:
142  addOns+='ZZ-Game'
143  if title.find('hd') != -1 or title.find('1080p') != -1 or title.find('720p') != -1:
144  HD = True
145  if title.startswith(searchTerm):
146  addOns+='1-'
147  for regexPattern in self.tailerNum_Patterns:
148  match = regexPattern.match(title)
149  if not match:
150  continue
151  trailerNum = match.groups()
152  if int(trailerNum[0]) < 20:
153  addOns+='Trailer #%s' % trailerNum[0]
154  title = title.replace(('trailer %s' % trailerNum[0]), '')
155  else:
156  addOns+='Trailer #1'
157  break
158  else:
159  if title.find('trailer') != -1:
160  addOns+='Trailer #1'
161  if HD and not addOns.startswith('ZZ-Game'):
162  if addOns:
163  addOns='HD-'+addOns
164  else:
165  addOns='YHD'
166  for text in ['hd', 'trailer', 'game', '1080p', '720p']:
167  title = title.replace(text, '').replace(' ', ' ').strip()
168  filteredDict[('%s %s' % (addOns, title)).strip()] = titleDict[key]
169 
170  # Get rid of obvious duplicates
171  filtered2Dict = {}
172  sortedList = sorted(filteredDict.keys())
173  for index in range(len(sortedList)):
174  if index == 0:
175  filtered2Dict[sortedList[index]] = deepcopy(filteredDict[sortedList[index]])
176  continue
177  if sortedList[index] != sortedList[index-1]:
178  filtered2Dict[sortedList[index]] = deepcopy(filteredDict[sortedList[index]])
179 
180  # Copy the remaining elements to a list
181  finalElements = []
182  sortedList = sorted(filtered2Dict.keys())
183  for index in range(len(sortedList)):
184  titleFilter(filtered2Dict[sortedList[index]])[0].text = '%02d. %s' % (index+1, titleFilter(filtered2Dict[sortedList[index]])[0].text)
185  finalElements.append(filtered2Dict[sortedList[index]])
186 
187  # Set the paging values
188  common.numresults = str(len(finalElements))
189  common.returned = common.numresults
190  common.startindex = common.numresults
191 
192  return finalElements
193  # end youtubeTrailerFilter()
194 
195  def youtubePaging(self, context, args):
196  '''Generate a page value specific to the mashup search for YouTube searches
197  Call example: 'mnvXpath:youtubePaging('dummy')'
198  The page value is some times a page # and sometimes an item position number
199  return the page value that will be used in the search as a string
200  '''
201  return str((int(common.pagenumber) -1) * common.page_limit + 1)
202  # end youtubeTrailerFilter()
203 
204 
209 
210 
215 
216 
217 
nv_python_libs.xsltfunctions.youtubeXSL_api.OutStreamEncoder.encoding
encoding
Definition: youtubeXSL_api.py:45
nv_python_libs.xsltfunctions.youtubeXSL_api.OutStreamEncoder.__getattr__
def __getattr__(self, attr)
Definition: youtubeXSL_api.py:58
nv_python_libs.xsltfunctions.youtubeXSL_api.xpathFunctions
Definition: youtubeXSL_api.py:89
nv_python_libs.xsltfunctions.youtubeXSL_api.xpathFunctions.youtubePaging
def youtubePaging(self, context, args)
Definition: youtubeXSL_api.py:195
nv_python_libs.xsltfunctions.youtubeXSL_api.OutStreamEncoder
Definition: youtubeXSL_api.py:40
nv_python_libs.xsltfunctions.youtubeXSL_api.OutStreamEncoder.__init__
def __init__(self, outstream, encoding=None)
Definition: youtubeXSL_api.py:42
nv_python_libs.xsltfunctions.youtubeXSL_api.xpathFunctions.__init__
def __init__(self)
Definition: youtubeXSL_api.py:92
nv_python_libs.xsltfunctions.youtubeXSL_api.xpathFunctions.youtubeTrailerFilter
def youtubeTrailerFilter(self, context, *args)
Start of XPath extension functions.
Definition: youtubeXSL_api.py:108
nv_python_libs.xsltfunctions.youtubeXSL_api.OutStreamEncoder.out
out
Definition: youtubeXSL_api.py:43
nv_python_libs.xsltfunctions.youtubeXSL_api.xpathFunctions.functList
functList
Definition: youtubeXSL_api.py:93
nv_python_libs.xsltfunctions.youtubeXSL_api.OutStreamEncoder.write
def write(self, obj)
Definition: youtubeXSL_api.py:49
nv_python_libs.xsltfunctions.youtubeXSL_api.xpathFunctions.tailerNum_Patterns
tailerNum_Patterns
Definition: youtubeXSL_api.py:94