MythTV master
spitzer_api.py
Go to the documentation of this file.
1# -*- coding: UTF-8 -*-
2
3# ----------------------
4# Name: spitzer_api - XPath and XSLT functions for the www.spitzer.caltech.edu grabber
5# Python Script
6# Author: R.D. Vaughan
7# Purpose: This python script is intended to perform a variety of utility functions
8# for the conversion of data to the MNV standard RSS output format.
9# See this link for the specifications:
10# http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
11#
12# License:Creative Commons GNU GPL v2
13# (http://creativecommons.org/licenses/GPL/2.0/)
14#-------------------------------------
15__title__ ="spitzer_api - XPath and XSLT functions for the www.spitzer.caltech.edu grabber"
16__author__="R.D. Vaughan"
17__purpose__='''
18This python script is intended to perform a variety of utility functions
19for the conversion of data to the MNV standard RSS output format.
20See this link for the specifications:
21http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
22'''
23
24__version__="v0.1.0"
25# 0.1.0 Initial development
26
27
28# Specify the class names that have XPath extention functions
29__xpathClassList__ = ['xpathFunctions', ]
30
31# Specify the XSLT extention class names. Each class is a stand lone extention function
32#__xsltExtentionList__ = ['xsltExtExample', ]
33__xsltExtentionList__ = []
34
35import os, sys, re, time, datetime, shutil, urllib.request, urllib.parse, urllib.error, string
36from copy import deepcopy
37import io
38
39class OutStreamEncoder(object):
40 """Wraps a stream with an encoder"""
41 def __init__(self, outstream, encoding=None):
42 self.out = outstream
43 if not encoding:
44 self.encoding = sys.getfilesystemencoding()
45 else:
46 self.encoding = encoding
47
48 def write(self, obj):
49 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
50 if isinstance(obj, str):
51 obj = obj.encode(self.encoding)
52 try:
53 self.out.buffer.write(obj)
54 except OSError:
55 pass
56
57 def __getattr__(self, attr):
58 """Delegate everything but write to the stream"""
59 return getattr(self.out, attr)
60
61if isinstance(sys.stdout, io.TextIOWrapper):
62 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
63 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
64
65try:
66 from io import StringIO
67 from lxml import etree
68except Exception as e:
69 sys.stderr.write('\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
70 sys.exit(1)
71
72
73class xpathFunctions(object):
74 """Functions specific extending XPath
75 """
76 def __init__(self):
77 self.functList = ['spitzerLinkGeneration', 'spitzerThumbnailLink', 'spitzerCheckIfDBItem', ]
78 self.TextTail = etree.XPath("string()")
79 self.persistence = {}
80 self.htmlParser = common.parsers['html'].copy()
81 # end __init__()
82
83
88
89 def spitzerLinkGeneration(self, context, *args):
90 '''Generate a link for the www.spitzer.caltech.edu site.
91 Call example: 'mnvXpath:spitzerLinkGeneration(normalize-space(link), $paraMeter)'
92 return the url link
93 '''
94 webURL = args[0]
95 pageTitle = args[1]
96 try:
97 tmpHandle = urllib.request.urlopen(webURL)
98 tmpHTML = str(tmpHandle.read(), 'utf-8')
99 tmpHandle.close()
100 except Exception as errmsg:
101 sys.stderr.write("Error reading url(%s) error(%s)\n" % (webURL, errmsg))
102 return webURL
103
104 # Get videocode
105 findText = "file=mp4:"
106 lenText = len(findText)
107 posText = tmpHTML.find(findText)
108 if posText == -1:
109 return webURL
110 tmpHTML = tmpHTML[posText+lenText:]
111 tmpLink = tmpHTML[:tmpHTML.find('.')]
112
113 # Fill out as much of the URL as possible
114 customHTML = common.linkWebPage('dummy', 'spitzer')
115 customHTML = customHTML.replace('TITLE', urllib.parse.quote(pageTitle))
116 customHTML = customHTML.replace('VIDEOCODE', tmpLink)
117
118 # Get Thumbnail image
119 findText = "image="
120 lenText = len(findText)
121 posText = tmpHTML.find(findText)
122 if posText == -1:
123 self.persistence['spitzerThumbnailLink'] = False
124 return customHTML.replace('IMAGE', '')
125 tmpHTML = tmpHTML[posText+lenText:]
126 tmpImage = tmpHTML[:tmpHTML.find('"')]
127 self.persistence['spitzerThumbnailLink'] = 'http://www.spitzer.caltech.edu%s' % tmpImage
128
129 return customHTML.replace('IMAGE', tmpImage)
130 # end spitzerLinkGeneration()
131
132 def spitzerThumbnailLink(self, context, *args):
133 '''Verify that the thumbnail actually exists. If it does not then use the site image.
134 Call example: 'mnvXpath:spitzerThumbnailLink('dummy')'
135 return the thumbnail url
136 '''
137 if not self.persistence['spitzerThumbnailLink']:
138 return ''
139 else:
140 return self.persistence['spitzerThumbnailLink']
141 # end spitzerThumbnailLink()
142
143 def spitzerCheckIfDBItem(self, context, *arg):
144 '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
145 has a matching item. This is done to save accessing the Internet when not required.
146 Call example: 'mnvXpath:spitzerCheckIfDBItem(title, author, description)'
147 return True if a match was found
148 return False if a match was not found
149 '''
150 return common.checkIfDBItem('dummy', {'feedtitle': 'Space', 'title': arg[0], 'author': arg[1], 'description': arg[2]})
151 # end spitzerCheckIfDBItem()
152
153
158
159
164
165
def __init__(self, outstream, encoding=None)
Definition: spitzer_api.py:41
def spitzerLinkGeneration(self, context, *args)
Start of XPath extension functions.
Definition: spitzer_api.py:89
MBASE_PUBLIC long long copy(QFile &dst, QFile &src, uint block_size=0)
Copies src file to dst file.