MythTV  master
spitzer_api.py
Go to the documentation of this file.
1 # -*- coding: UTF-8 -*-
2 
3 # ----------------------
4 # Name: spitzer_api - XPath and XSLT functions for the www.spitzer.caltech.edu grabber
5 # Python Script
6 # Author: R.D. Vaughan
7 # Purpose: This python script is intended to perform a variety of utility functions
8 # for the conversion of data to the MNV standard RSS output format.
9 # See this link for the specifications:
10 # http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
11 #
12 # License:Creative Commons GNU GPL v2
13 # (http://creativecommons.org/licenses/GPL/2.0/)
14 #-------------------------------------
15 __title__ ="spitzer_api - XPath and XSLT functions for the www.spitzer.caltech.edu grabber"
16 __author__="R.D. Vaughan"
17 __purpose__='''
18 This python script is intended to perform a variety of utility functions
19 for the conversion of data to the MNV standard RSS output format.
20 See this link for the specifications:
21 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
22 '''
23 
24 __version__="v0.1.0"
25 # 0.1.0 Initial development
26 
27 
28 # Specify the class names that have XPath extention functions
29 __xpathClassList__ = ['xpathFunctions', ]
30 
31 # Specify the XSLT extention class names. Each class is a stand lone extention function
32 #__xsltExtentionList__ = ['xsltExtExample', ]
33 __xsltExtentionList__ = []
34 
35 import os, sys, re, time, datetime, shutil, urllib.request, urllib.parse, urllib.error, string
36 from copy import deepcopy
37 import io
38 
39 class OutStreamEncoder(object):
40  """Wraps a stream with an encoder"""
41  def __init__(self, outstream, encoding=None):
42  self.out = outstream
43  if not encoding:
44  self.encoding = sys.getfilesystemencoding()
45  else:
46  self.encoding = encoding
47 
48  def write(self, obj):
49  """Wraps the output stream, encoding Unicode strings with the specified encoding"""
50  if isinstance(obj, str):
51  obj = obj.encode(self.encoding)
52  try:
53  self.out.buffer.write(obj)
54  except OSError:
55  pass
56 
57  def __getattr__(self, attr):
58  """Delegate everything but write to the stream"""
59  return getattr(self.out, attr)
60 
61 if isinstance(sys.stdout, io.TextIOWrapper):
62  sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
63  sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
64 
65 try:
66  from io import StringIO
67  from lxml import etree
68 except Exception as e:
69  sys.stderr.write('\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
70  sys.exit(1)
71 
72 # Check that the lxml library is current enough
73 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
74 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
75 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
76 version = ''
77 for digit in etree.LIBXML_VERSION:
78  version+=str(digit)+'.'
79 version = version[:-1]
80 if version < '2.7.2':
81  sys.stderr.write('''
82 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
83  At least "libxml" version 2.7.2 must be installed. Your version is (%s).
84 ''' % version)
85  sys.exit(1)
86 
87 
88 class xpathFunctions(object):
89  """Functions specific extending XPath
90  """
91  def __init__(self):
92  self.functList = ['spitzerLinkGeneration', 'spitzerThumbnailLink', 'spitzerCheckIfDBItem', ]
93  self.TextTail = etree.XPath("string()")
94  self.persistence = {}
95  self.htmlParser = common.parsers['html'].copy()
96  # end __init__()
97 
98 
103 
104  def spitzerLinkGeneration(self, context, *args):
105  '''Generate a link for the www.spitzer.caltech.edu site.
106  Call example: 'mnvXpath:spitzerLinkGeneration(normalize-space(link), $paraMeter)'
107  return the url link
108  '''
109  webURL = args[0]
110  pageTitle = args[1]
111  try:
112  tmpHandle = urllib.request.urlopen(webURL)
113  tmpHTML = str(tmpHandle.read(), 'utf-8')
114  tmpHandle.close()
115  except Exception as errmsg:
116  sys.stderr.write("Error reading url(%s) error(%s)\n" % (webURL, errmsg))
117  return webURL
118 
119  # Get videocode
120  findText = "file=mp4:"
121  lenText = len(findText)
122  posText = tmpHTML.find(findText)
123  if posText == -1:
124  return webURL
125  tmpHTML = tmpHTML[posText+lenText:]
126  tmpLink = tmpHTML[:tmpHTML.find('.')]
127 
128  # Fill out as much of the URL as possible
129  customHTML = common.linkWebPage('dummy', 'spitzer')
130  customHTML = customHTML.replace('TITLE', urllib.parse.quote(pageTitle))
131  customHTML = customHTML.replace('VIDEOCODE', tmpLink)
132 
133  # Get Thumbnail image
134  findText = "image="
135  lenText = len(findText)
136  posText = tmpHTML.find(findText)
137  if posText == -1:
138  self.persistence['spitzerThumbnailLink'] = False
139  return customHTML.replace('IMAGE', '')
140  tmpHTML = tmpHTML[posText+lenText:]
141  tmpImage = tmpHTML[:tmpHTML.find('"')]
142  self.persistence['spitzerThumbnailLink'] = 'http://www.spitzer.caltech.edu%s' % tmpImage
143 
144  return customHTML.replace('IMAGE', tmpImage)
145  # end spitzerLinkGeneration()
146 
147  def spitzerThumbnailLink(self, context, *args):
148  '''Verify that the thumbnail actually exists. If it does not then use the site image.
149  Call example: 'mnvXpath:spitzerThumbnailLink('dummy')'
150  return the thumbnail url
151  '''
152  if not self.persistence['spitzerThumbnailLink']:
153  return ''
154  else:
155  return self.persistence['spitzerThumbnailLink']
156  # end spitzerThumbnailLink()
157 
158  def spitzerCheckIfDBItem(self, context, *arg):
159  '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
160  has a matching item. This is done to save accessing the Internet when not required.
161  Call example: 'mnvXpath:spitzerCheckIfDBItem(title, author, description)'
162  return True if a match was found
163  return False if a match was not found
164  '''
165  return common.checkIfDBItem('dummy', {'feedtitle': 'Space', 'title': arg[0], 'author': arg[1], 'description': arg[2]})
166  # end spitzerCheckIfDBItem()
167 
168 
173 
174 
179 
180 
nv_python_libs.xsltfunctions.spitzer_api.OutStreamEncoder.__init__
def __init__(self, outstream, encoding=None)
Definition: spitzer_api.py:41
nv_python_libs.xsltfunctions.spitzer_api.xpathFunctions.persistence
persistence
Definition: spitzer_api.py:94
nv_python_libs.xsltfunctions.spitzer_api.xpathFunctions
Definition: spitzer_api.py:88
nv_python_libs.xsltfunctions.spitzer_api.OutStreamEncoder.__getattr__
def __getattr__(self, attr)
Definition: spitzer_api.py:57
nv_python_libs.xsltfunctions.spitzer_api.OutStreamEncoder.encoding
encoding
Definition: spitzer_api.py:44
nv_python_libs.xsltfunctions.spitzer_api.xpathFunctions.__init__
def __init__(self)
Definition: spitzer_api.py:91
nv_python_libs.xsltfunctions.spitzer_api.xpathFunctions.spitzerThumbnailLink
def spitzerThumbnailLink(self, context, *args)
Definition: spitzer_api.py:147
MythFile::copy
MBASE_PUBLIC long long copy(QFile &dst, QFile &src, uint block_size=0)
Copies src file to dst file.
Definition: mythmiscutil.cpp:263
nv_python_libs.xsltfunctions.spitzer_api.xpathFunctions.spitzerCheckIfDBItem
def spitzerCheckIfDBItem(self, context, *arg)
Definition: spitzer_api.py:158
nv_python_libs.xsltfunctions.spitzer_api.OutStreamEncoder
Definition: spitzer_api.py:39
nv_python_libs.xsltfunctions.spitzer_api.xpathFunctions.functList
functList
Definition: spitzer_api.py:92
nv_python_libs.xsltfunctions.spitzer_api.xpathFunctions.TextTail
TextTail
Definition: spitzer_api.py:93
nv_python_libs.xsltfunctions.spitzer_api.OutStreamEncoder.out
out
Definition: spitzer_api.py:42
nv_python_libs.xsltfunctions.spitzer_api.OutStreamEncoder.write
def write(self, obj)
Definition: spitzer_api.py:48
nv_python_libs.xsltfunctions.spitzer_api.xpathFunctions.htmlParser
htmlParser
Definition: spitzer_api.py:95
nv_python_libs.xsltfunctions.spitzer_api.xpathFunctions.spitzerLinkGeneration
def spitzerLinkGeneration(self, context, *args)
Start of XPath extension functions.
Definition: spitzer_api.py:104