MythTV  master
youtubeXSL_api.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3 # ----------------------
4 # Name: youtubeXSL_api - XPath and XSLT functions for the mashup grabbers
5 # Python Script
6 # Author: R.D. Vaughan
7 # Purpose: This python script is intended to perform a variety of utility functions
8 # for the conversion of data to the MNV standard RSS output format.
9 # See this link for the specifications:
10 # http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
11 #
12 # License:Creative Commons GNU GPL v2
13 # (http://creativecommons.org/licenses/GPL/2.0/)
14 #-------------------------------------
15 __title__ ="youtubeXSL_api - XPath and XSLT functions for the mashup grabbers"
16 __author__="R.D. Vaughan"
17 __purpose__='''
18 This python script is intended to perform a variety of utility functions
19 for the conversion of data to the MNV standard RSS output format.
20 See this link for the specifications:
21 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
22 '''
23 
24 __version__="v0.1.0"
25 # 0.1.0 Initial development
26 
27 
28 # Specify the class names that have XPath extention functions
29 __xpathClassList__ = ['xpathFunctions', ]
30 
31 # Specify the XSLT extention class names. Each class is a stand lone extention function
32 #__xsltExtentionList__ = ['xsltExtExample', ]
33 __xsltExtentionList__ = []
34 
35 import os, sys, re, time, datetime, shutil, urllib, string
36 from copy import deepcopy
37 
38 
39 class OutStreamEncoder(object):
40  """Wraps a stream with an encoder"""
41  def __init__(self, outstream, encoding=None):
42  self.out = outstream
43  if not encoding:
44  self.encoding = sys.getfilesystemencoding()
45  else:
46  self.encoding = encoding
47 
48  def write(self, obj):
49  """Wraps the output stream, encoding Unicode strings with the specified encoding"""
50  if isinstance(obj, unicode):
51  try:
52  self.out.write(obj.encode(self.encoding))
53  except IOError:
54  pass
55  else:
56  try:
57  self.out.write(obj)
58  except IOError:
59  pass
60 
61  def __getattr__(self, attr):
62  """Delegate everything but write to the stream"""
63  return getattr(self.out, attr)
64 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
65 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
66 
67 try:
68  from StringIO import StringIO
69  from lxml import etree
70 except Exception, e:
71  sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
72  sys.exit(1)
73 
74 # Check that the lxml library is current enough
75 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
76 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
77 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
78 version = ''
79 for digit in etree.LIBXML_VERSION:
80  version+=str(digit)+'.'
81 version = version[:-1]
82 if version < '2.7.2':
83  sys.stderr.write(u'''
84 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
85  At least "libxml" version 2.7.2 must be installed. Your version is (%s).
86 ''' % version)
87  sys.exit(1)
88 
89 
90 class xpathFunctions(object):
91  """Functions specific extending XPath
92  """
93  def __init__(self):
94  self.functList = ['youtubeTrailerFilter', 'youtubePaging', ]
96  # "trailer 7"
97  re.compile(u'''^.+?trailer\\ (?P<trailerNum>[0-9]+).*$''', re.UNICODE),
98  # "trailer #7"
99  re.compile(u'''^.+?trailer\\ \\#(?P<trailerNum>[0-9]+).*$''', re.UNICODE),
100  ]
101  # end __init__()
102 
103 
108 
109  def youtubeTrailerFilter(self, context, *args):
110  '''Generate a list of entry elements that are relevant to the requested search term. Basically
111  remove duplicate and non-relevant search results and order them to provide the best results
112  for the user.
113  Also set the paging variables.
114  Call example: 'mnvXpath:youtubeTrailerFilter(//atm:entry)'
115  return the list of relevant "entry" elements
116  '''
117  searchTerm = common.removePunc('dummy', common.searchterm.lower())
118  titleFilter = etree.XPath('.//atm:title', namespaces=common.namespaces)
119 
120  # Remove any leading word "The" from the search term
121  if searchTerm.startswith(u'the '):
122  searchTerm = searchTerm[4:].strip()
123 
124  titleDict = {}
125  for entry in args[0]:
126  titleDict[titleFilter(entry)[0].text] = entry
127 
128  # Tag so that there is an order plus duplicates can be easily spotted
129  filteredDict = {}
130  for key in titleDict.keys():
131  title = common.removePunc('dummy', key.lower())
132  if title.startswith(u'the '):
133  title = title[4:].strip()
134  if searchTerm.find('new ') == -1:
135  title = title.replace(u'new ', u'')
136  if searchTerm.find('official ') == -1:
137  title = title.replace(u'official ', u'')
138  if title.find(searchTerm) != -1:
139  addOns = u''
140  HD = False
141  if searchTerm.find('game ') == -1:
142  if title.find('game') != -1:
143  addOns+=u'ZZ-Game'
144  if title.find('hd') != -1 or title.find('1080p') != -1 or title.find('720p') != -1:
145  HD = True
146  if title.startswith(searchTerm):
147  addOns+=u'1-'
148  for regexPattern in self.tailerNum_Patterns:
149  match = regexPattern.match(title)
150  if not match:
151  continue
152  trailerNum = match.groups()
153  if int(trailerNum[0]) < 20:
154  addOns+=u'Trailer #%s' % trailerNum[0]
155  title = title.replace((u'trailer %s' % trailerNum[0]), u'')
156  else:
157  addOns+=u'Trailer #1'
158  break
159  else:
160  if title.find('trailer') != -1:
161  addOns+=u'Trailer #1'
162  if HD and not addOns.startswith(u'ZZ-Game'):
163  if addOns:
164  addOns=u'HD-'+addOns
165  else:
166  addOns=u'YHD'
167  for text in [u'hd', u'trailer', u'game', u'1080p', u'720p']:
168  title = title.replace(text, u'').replace(u' ', u' ').strip()
169  filteredDict[(u'%s %s' % (addOns, title)).strip()] = titleDict[key]
170 
171  # Get rid of obvious duplicates
172  filtered2Dict = {}
173  sortedList = sorted(filteredDict.keys())
174  for index in range(len(sortedList)):
175  if index == 0:
176  filtered2Dict[sortedList[index]] = deepcopy(filteredDict[sortedList[index]])
177  continue
178  if sortedList[index] != sortedList[index-1]:
179  filtered2Dict[sortedList[index]] = deepcopy(filteredDict[sortedList[index]])
180 
181  # Copy the remaining elements to a list
182  finalElements = []
183  sortedList = sorted(filtered2Dict.keys())
184  for index in range(len(sortedList)):
185  titleFilter(filtered2Dict[sortedList[index]])[0].text = u'%02d. %s' % (index+1, titleFilter(filtered2Dict[sortedList[index]])[0].text)
186  finalElements.append(filtered2Dict[sortedList[index]])
187 
188  # Set the paging values
189  common.numresults = str(len(finalElements))
190  common.returned = common.numresults
191  common.startindex = common.numresults
192 
193  return finalElements
194  # end youtubeTrailerFilter()
195 
196  def youtubePaging(self, context, args):
197  '''Generate a page value specific to the mashup search for YouTube searches
198  Call example: 'mnvXpath:youtubePaging('dummy')'
199  The page value is some times a page # and sometimes an item position number
200  return the page value that will be used in the search as a string
201  '''
202  return str((int(common.pagenumber) -1) * common.page_limit + 1)
203  # end youtubeTrailerFilter()
204 
205 
210 
211 
216 
217 
218 
def youtubeTrailerFilter(self, context, *args)
Start of XPath extension functions.