MythTV  master
revision3XSL_api.py
Go to the documentation of this file.
1 # -*- coding: UTF-8 -*-
2 
3 # ----------------------
4 # Name: revision3XSL_api - XPath and XSLT functions for the Revision3 RSS/HTML items
5 # Python Script
6 # Author: R.D. Vaughan
7 # Purpose: This python script is intended to perform a variety of utility functions
8 # for the conversion of data to the MNV standard RSS output format.
9 # See this link for the specifications:
10 # http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
11 #
12 # License:Creative Commons GNU GPL v2
13 # (http://creativecommons.org/licenses/GPL/2.0/)
14 #-------------------------------------
15 __title__ ="revision3XSL_api - XPath and XSLT functions for the www.revision3L.com RSS/HTML"
16 __author__="R.D. Vaughan"
17 __purpose__='''
18 This python script is intended to perform a variety of utility functions
19 for the conversion of data to the MNV standard RSS output format.
20 See this link for the specifications:
21 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
22 '''
23 
24 __version__="v0.1.1"
25 # 0.1.0 Initial development
26 
27 
28 # Specify the class names that have XPath extention functions
29 __xpathClassList__ = ['xpathFunctions', ]
30 
31 # Specify the XSLT extention class names. Each class is a stand lone extention function
32 #__xsltExtentionList__ = ['xsltExtExample', ]
33 __xsltExtentionList__ = []
34 
35 import os, sys, re, time, datetime, shutil, urllib.request, urllib.parse, urllib.error, string
36 from copy import deepcopy
37 import io
38 
39 class OutStreamEncoder(object):
40  """Wraps a stream with an encoder"""
41  def __init__(self, outstream, encoding=None):
42  self.out = outstream
43  if not encoding:
44  self.encoding = sys.getfilesystemencoding()
45  else:
46  self.encoding = encoding
47 
48  def write(self, obj):
49  """Wraps the output stream, encoding Unicode strings with the specified encoding"""
50  if isinstance(obj, str):
51  obj = obj.encode(self.encoding)
52  try:
53  self.out.buffer.write(obj)
54  except OSError:
55  pass
56 
57  def __getattr__(self, attr):
58  """Delegate everything but write to the stream"""
59  return getattr(self.out, attr)
60 
61 if isinstance(sys.stdout, io.TextIOWrapper):
62  sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
63  sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
64 
65 try:
66  from io import StringIO
67  from lxml import etree
68 except Exception as e:
69  sys.stderr.write('\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
70  sys.exit(1)
71 
72 # Check that the lxml library is current enough
73 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
74 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
75 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
76 version = ''
77 for digit in etree.LIBXML_VERSION:
78  version+=str(digit)+'.'
79 version = version[:-1]
80 if version < '2.7.2':
81  sys.stderr.write('''
82 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
83  At least "libxml" version 2.7.2 must be installed. Your version is (%s).
84 ''' % version)
85  sys.exit(1)
86 
87 
88 class xpathFunctions(object):
89  """Functions specific extending XPath
90  """
91  def __init__(self):
92  self.functList = ['revision3LinkGeneration', 'revision3Episode', 'revision3checkIfDBItem', ]
93  self.episodeRegex = [
94  re.compile('''^.+?\\-\\-(?P<episodeno>[0-9]+)\\-\\-.*$''', re.UNICODE),
95  ]
96  self.namespaces = {
97  'atom': "http://www.w3.org/2005/Atom",
98  'media': "http://search.yahoo.com/mrss/",
99  'itunes':"http://www.itunes.com/dtds/podcast-1.0.dtd",
100  'xhtml': "http://www.w3.org/1999/xhtml",
101  'mythtv': "http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format",
102  'cnettv': "http://cnettv.com/mrss/",
103  'creativeCommons': "http://backend.userland.com/creativeCommonsRssModule",
104  'amp': "http://www.adobe.com/amp/1.0",
105  'content': "http://purl.org/rss/1.0/modules/content/",
106  }
107  self.mediaIdFilters = [
108  [etree.XPath('//object/@id', namespaces=self.namespaces ), None],
109  ]
110  self.FullScreen = 'http://revision3.com/show/popupPlayer?video_id=%s&quality=high&offset=0'
111  self.FullScreenParser = common.parsers['html'].copy()
112  # end __init__()
113 
114 
119 
120  def revision3LinkGeneration(self, context, *arg):
121  '''Generate a link for the video.
122  Call example: 'mnvXpath:revision3LinkGeneration(string(link))'
123  return the url link
124  '''
125  webURL = arg[0]
126  try:
127  tmpHTML = etree.parse(webURL, self.FullScreenParser)
128  except Exception as errmsg:
129  sys.stderr.write("Error reading url(%s) error(%s)\n" % (webURL, errmsg))
130  return webURL
131 
132  for index in range(len(self.mediaIdFilters)):
133  mediaId = self.mediaIdFilters[index][0](tmpHTML)
134  if not len(mediaId):
135  continue
136  if self.mediaIdFilters[index][1]:
137  match = self.mediaIdFilters[index][1].match(mediaId[0])
138  if match:
139  videocode = match.groups()
140  return self.FullScreen % (videocode[0])
141  else:
142  return self.FullScreen % (mediaId[0].strip().replace('player-', ''))
143  else:
144  return webURL
145  # end revision3LinkGeneration()
146 
147  def revision3Episode(self, context, *arg):
148  '''Parse the download link and extract an episode number
149  Call example: 'mnvXpath:revision3Episode(.)'
150  return the a massaged title element and an episode element in an array
151  '''
152  title = arg[0][0].find('title').text
153  link = arg[0][0].find('enclosure').attrib['url']
154 
155  episodeNumber = ''
156  for index in range(len(self.episodeRegex)):
157  match = self.episodeRegex[index].match(link)
158  if match:
159  episodeNumber = int(match.groups()[0])
160  break
161  titleElement = etree.XML("<xml></xml>")
162  etree.SubElement(titleElement, "title").text = 'Ep%03d: %s' % (episodeNumber, title)
163  if episodeNumber:
164  etree.SubElement(titleElement, "episode").text = '%s' % episodeNumber
165  return [titleElement]
166  # end revision3Episode()
167 
168  def revision3checkIfDBItem(self, context, arg):
169  '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
170  has a matching item. This is done to save accessing the Internet when not required.
171  Call example: 'mnvXpath:revision3checkIfDBItem(.)'
172  return True if a match was found
173  return False if a match was not found
174  '''
175  return common.checkIfDBItem('dummy', {'title': self.revision3Episode(context, arg)[0].find('title').text, })
176  # end revision3checkIfDBItem()
177 
178 
183 
184 
189 
190 
nv_python_libs.xsltfunctions.revision3XSL_api.OutStreamEncoder.__getattr__
def __getattr__(self, attr)
Definition: revision3XSL_api.py:57
nv_python_libs.xsltfunctions.revision3XSL_api.OutStreamEncoder.__init__
def __init__(self, outstream, encoding=None)
Definition: revision3XSL_api.py:41
nv_python_libs.xsltfunctions.revision3XSL_api.OutStreamEncoder
Definition: revision3XSL_api.py:39
nv_python_libs.xsltfunctions.revision3XSL_api.xpathFunctions.functList
functList
Definition: revision3XSL_api.py:92
nv_python_libs.xsltfunctions.revision3XSL_api.xpathFunctions.FullScreen
FullScreen
Definition: revision3XSL_api.py:110
nv_python_libs.xsltfunctions.revision3XSL_api.OutStreamEncoder.out
out
Definition: revision3XSL_api.py:42
nv_python_libs.xsltfunctions.revision3XSL_api.xpathFunctions.FullScreenParser
FullScreenParser
Definition: revision3XSL_api.py:111
MythFile::copy
MBASE_PUBLIC long long copy(QFile &dst, QFile &src, uint block_size=0)
Copies src file to dst file.
Definition: mythmiscutil.cpp:264
nv_python_libs.xsltfunctions.revision3XSL_api.xpathFunctions
Definition: revision3XSL_api.py:88
nv_python_libs.xsltfunctions.revision3XSL_api.OutStreamEncoder.write
def write(self, obj)
Definition: revision3XSL_api.py:48
nv_python_libs.xsltfunctions.revision3XSL_api.xpathFunctions.namespaces
namespaces
Definition: revision3XSL_api.py:96
nv_python_libs.xsltfunctions.revision3XSL_api.xpathFunctions.episodeRegex
episodeRegex
Definition: revision3XSL_api.py:93
nv_python_libs.xsltfunctions.revision3XSL_api.xpathFunctions.revision3LinkGeneration
def revision3LinkGeneration(self, context, *arg)
Start of XPath extension functions.
Definition: revision3XSL_api.py:120
nv_python_libs.xsltfunctions.revision3XSL_api.xpathFunctions.revision3Episode
def revision3Episode(self, context, *arg)
Definition: revision3XSL_api.py:147
nv_python_libs.xsltfunctions.revision3XSL_api.xpathFunctions.mediaIdFilters
mediaIdFilters
Definition: revision3XSL_api.py:107
nv_python_libs.xsltfunctions.revision3XSL_api.xpathFunctions.__init__
def __init__(self)
Definition: revision3XSL_api.py:91
nv_python_libs.xsltfunctions.revision3XSL_api.xpathFunctions.revision3checkIfDBItem
def revision3checkIfDBItem(self, context, arg)
Definition: revision3XSL_api.py:168
find
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)
Definition: dvbstreamhandler.cpp:363
nv_python_libs.xsltfunctions.revision3XSL_api.OutStreamEncoder.encoding
encoding
Definition: revision3XSL_api.py:44