MythTV  master
spitzer_api.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3 # ----------------------
4 # Name: spitzer_api - XPath and XSLT functions for the www.spitzer.caltech.edu grabber
5 # Python Script
6 # Author: R.D. Vaughan
7 # Purpose: This python script is intended to perform a variety of utility functions
8 # for the conversion of data to the MNV standard RSS output format.
9 # See this link for the specifications:
10 # http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
11 #
12 # License:Creative Commons GNU GPL v2
13 # (http://creativecommons.org/licenses/GPL/2.0/)
14 #-------------------------------------
15 __title__ ="spitzer_api - XPath and XSLT functions for the www.spitzer.caltech.edu grabber"
16 __author__="R.D. Vaughan"
17 __purpose__='''
18 This python script is intended to perform a variety of utility functions
19 for the conversion of data to the MNV standard RSS output format.
20 See this link for the specifications:
21 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
22 '''
23 
24 __version__="v0.1.0"
25 # 0.1.0 Initial development
26 
27 
28 # Specify the class names that have XPath extention functions
29 __xpathClassList__ = ['xpathFunctions', ]
30 
31 # Specify the XSLT extention class names. Each class is a stand lone extention function
32 #__xsltExtentionList__ = ['xsltExtExample', ]
33 __xsltExtentionList__ = []
34 
35 import os, sys, re, time, datetime, shutil, urllib, string
36 from copy import deepcopy
37 
38 
39 class OutStreamEncoder(object):
40  """Wraps a stream with an encoder"""
41  def __init__(self, outstream, encoding=None):
42  self.out = outstream
43  if not encoding:
44  self.encoding = sys.getfilesystemencoding()
45  else:
46  self.encoding = encoding
47 
48  def write(self, obj):
49  """Wraps the output stream, encoding Unicode strings with the specified encoding"""
50  if isinstance(obj, unicode):
51  try:
52  self.out.write(obj.encode(self.encoding))
53  except IOError:
54  pass
55  else:
56  try:
57  self.out.write(obj)
58  except IOError:
59  pass
60 
61  def __getattr__(self, attr):
62  """Delegate everything but write to the stream"""
63  return getattr(self.out, attr)
64 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
65 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
66 
67 try:
68  from StringIO import StringIO
69  from lxml import etree
70 except Exception, e:
71  sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
72  sys.exit(1)
73 
74 # Check that the lxml library is current enough
75 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
76 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
77 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
78 version = ''
79 for digit in etree.LIBXML_VERSION:
80  version+=str(digit)+'.'
81 version = version[:-1]
82 if version < '2.7.2':
83  sys.stderr.write(u'''
84 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
85  At least "libxml" version 2.7.2 must be installed. Your version is (%s).
86 ''' % version)
87  sys.exit(1)
88 
89 
90 class xpathFunctions(object):
91  """Functions specific extending XPath
92  """
93  def __init__(self):
94  self.functList = ['spitzerLinkGeneration', 'spitzerThumbnailLink', 'spitzerCheckIfDBItem', ]
95  self.TextTail = etree.XPath("string()")
96  self.persistence = {}
97  self.htmlParser = common.parsers['html'].copy()
98  # end __init__()
99 
100 
105 
106  def spitzerLinkGeneration(self, context, *args):
107  '''Generate a link for the www.spitzer.caltech.edu site.
108  Call example: 'mnvXpath:spitzerLinkGeneration(normalize-space(link), $paraMeter)'
109  return the url link
110  '''
111  webURL = args[0]
112  pageTitle = args[1]
113  try:
114  tmpHandle = urllib.urlopen(webURL)
115  tmpHTML = unicode(tmpHandle.read(), 'utf-8')
116  tmpHandle.close()
117  except Exception, errmsg:
118  sys.stderr.write(u"Error reading url(%s) error(%s)\n" % (webURL, errmsg))
119  return webURL
120 
121  # Get videocode
122  findText = u"file=mp4:"
123  lenText = len(findText)
124  posText = tmpHTML.find(findText)
125  if posText == -1:
126  return webURL
127  tmpHTML = tmpHTML[posText+lenText:]
128  tmpLink = tmpHTML[:tmpHTML.find('.')]
129 
130  # Fill out as much of the URL as possible
131  customHTML = common.linkWebPage('dummy', 'spitzer')
132  customHTML = customHTML.replace('TITLE', urllib.quote(pageTitle))
133  customHTML = customHTML.replace('VIDEOCODE', tmpLink)
134 
135  # Get Thumbnail image
136  findText = u"image="
137  lenText = len(findText)
138  posText = tmpHTML.find(findText)
139  if posText == -1:
140  self.persistence['spitzerThumbnailLink'] = False
141  return customHTML.replace('IMAGE', u'')
142  tmpHTML = tmpHTML[posText+lenText:]
143  tmpImage = tmpHTML[:tmpHTML.find('"')]
144  self.persistence['spitzerThumbnailLink'] = u'http://www.spitzer.caltech.edu%s' % tmpImage
145 
146  return customHTML.replace('IMAGE', tmpImage)
147  # end spitzerLinkGeneration()
148 
149  def spitzerThumbnailLink(self, context, *args):
150  '''Verify that the thumbnail actually exists. If it does not then use the site image.
151  Call example: 'mnvXpath:spitzerThumbnailLink('dummy')'
152  return the thumbnail url
153  '''
154  if not self.persistence['spitzerThumbnailLink']:
155  return u''
156  else:
157  return self.persistence['spitzerThumbnailLink']
158  # end spitzerThumbnailLink()
159 
160  def spitzerCheckIfDBItem(self, context, *arg):
161  '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
162  has a matching item. This is done to save accessing the Internet when not required.
163  Call example: 'mnvXpath:spitzerCheckIfDBItem(title, author, description)'
164  return True if a match was found
165  return False if a match was not found
166  '''
167  return common.checkIfDBItem('dummy', {'feedtitle': 'Space', 'title': arg[0], 'author': arg[1], 'description': arg[2]})
168  # end spitzerCheckIfDBItem()
169 
170 
175 
176 
181 
182 
def __init__(self, outstream, encoding=None)
Definition: spitzer_api.py:41
long long copy(QFile &dst, QFile &src, uint block_size)
Copies src file to dst file.
def spitzerLinkGeneration(self, context, *args)
Start of XPath extension functions.
Definition: spitzer_api.py:106