MythTV  master
linuxAction_api.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3 # ----------------------
4 # Name: linuxAction_api - XPath and XSLT functions for the www.jupiterbroadcasting.com RSS/HTML items
5 # Python Script
6 # Author: R.D. Vaughan
7 # Purpose: This python script is intended to perform a variety of utility functions
8 # for the conversion of data to the MNV standard RSS output format.
9 # See this link for the specifications:
10 # http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
11 #
12 # License:Creative Commons GNU GPL v2
13 # (http://creativecommons.org/licenses/GPL/2.0/)
14 #-------------------------------------
15 __title__ ="linuxAction_api - XPath and XSLT functions for the www.jupiterbroadcasting.com RSS/HTML"
16 __author__="R.D. Vaughan"
17 __purpose__='''
18 This python script is intended to perform a variety of utility functions
19 for the conversion of data to the MNV standard RSS output format.
20 See this link for the specifications:
21 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
22 '''
23 
24 __version__="v0.1.1"
25 # 0.1.0 Initial development
26 
27 
28 # Specify the class names that have XPath extention functions
29 __xpathClassList__ = ['xpathFunctions', ]
30 
31 # Specify the XSLT extention class names. Each class is a stand lone extention function
32 #__xsltExtentionList__ = ['xsltExtExample', ]
33 __xsltExtentionList__ = []
34 
35 import os, sys, re, time, datetime, shutil, urllib, string
36 from copy import deepcopy
37 
38 
39 class OutStreamEncoder(object):
40  """Wraps a stream with an encoder"""
41  def __init__(self, outstream, encoding=None):
42  self.out = outstream
43  if not encoding:
44  self.encoding = sys.getfilesystemencoding()
45  else:
46  self.encoding = encoding
47 
48  def write(self, obj):
49  """Wraps the output stream, encoding Unicode strings with the specified encoding"""
50  if isinstance(obj, unicode):
51  try:
52  self.out.write(obj.encode(self.encoding))
53  except IOError:
54  pass
55  else:
56  try:
57  self.out.write(obj)
58  except IOError:
59  pass
60 
61  def __getattr__(self, attr):
62  """Delegate everything but write to the stream"""
63  return getattr(self.out, attr)
64 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
65 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
66 
67 try:
68  from StringIO import StringIO
69  from lxml import etree
70 except Exception, e:
71  sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
72  sys.exit(1)
73 
74 # Check that the lxml library is current enough
75 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
76 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
77 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
78 version = ''
79 for digit in etree.LIBXML_VERSION:
80  version+=str(digit)+'.'
81 version = version[:-1]
82 if version < '2.7.2':
83  sys.stderr.write(u'''
84 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
85  At least "libxml" version 2.7.2 must be installed. Your version is (%s).
86 ''' % version)
87  sys.exit(1)
88 
89 
90 class xpathFunctions(object):
91  """Functions specific extending XPath
92  """
93  def __init__(self):
94  self.functList = ['linuxActionLinkGeneration', 'linuxActionTitleSeEp', 'linuxActioncheckIfDBItem', ]
95  self.s_e_Regex = [
96  # s12e05
97  re.compile(u'''^.+?[Ss](?P<seasno>[0-9]+)\\e(?P<epno>[0-9]+).*$''', re.UNICODE),
98  # Season 11 Episode 3
99  re.compile(u'''^.+?Season\\ (?P<seasno>[0-9]+)\\ Episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
100  ]
101  self.namespaces = {
102  'atom': "http://www.w3.org/2005/Atom",
103  'atom10': "http://www.w3.org/2005/Atom",
104  'media': "http://search.yahoo.com/mrss/",
105  'itunes':"http://www.itunes.com/dtds/podcast-1.0.dtd",
106  'xhtml': "http://www.w3.org/1999/xhtml",
107  'mythtv': "http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format",
108  'feedburner': "http://rssnamespace.org/feedburner/ext/1.0",
109  'fb': "http://www.facebook.com/2008/fbml",
110  }
111  self.mediaIdFilters = [
112  [etree.XPath('//object/@id', namespaces=self.namespaces ), None],
113  ]
114  self.FullScreen = u'http://linuxAction.com/show/popupPlayer?video_id=%s&quality=high&offset=0'
115  self.FullScreenParser = common.parsers['html'].copy()
116  # end __init__()
117 
118 
123 
124  def linuxActionLinkGeneration(self, context, *arg):
125  '''Generate a link for the video.
126  Call example: 'mnvXpath:linuxActionLinkGeneration(string(link))'
127  return the url link
128  '''
129  webURL = arg[0]
130  try:
131  tmpHandle = urllib.urlopen(webURL)
132  tmpHTML = unicode(tmpHandle.read(), 'utf-8')
133  tmpHandle.close()
134  except Exception, errmsg:
135  sys.stderr.write(u"Error reading url(%s) error(%s)\n" % (webURL, errmsg))
136  return webURL
137 
138  findText = u"<embed src="
139  lenText = len(findText)
140  posText = tmpHTML.find(findText)
141  if posText == -1:
142  return webURL
143  tmpHTML = tmpHTML[posText+lenText+1:]
144 
145  tmpLink = tmpHTML[:tmpHTML.find('"')]
146  if tmpLink.find('www.youtube.com') != -1:
147  return u'%s&autoplay=1' % tmpLink
148  else:
149  return u'%s?autostart=1' % tmpLink
150  # end linuxActionLinkGeneration()
151 
152  def linuxActionTitleSeEp(self, context, *arg):
153  '''Parse the download link and extract an episode number
154  Call example: 'mnvXpath:linuxActionTitleSeEp(title)'
155  return the a massaged title element and an episode element in an array
156  '''
157  title = arg[0]
158  index = title.find('|')
159  if index > 0:
160  title = title[:index].strip()
161  index = title.find('The Linux Action Show')
162  if index > 0:
163  title = title[:index].strip()
164  index = title.find('! Season')
165  if index > 0:
166  title = title[:index-1].strip()
167  title = common.htmlToString('dummy', title)
168 
169  elementArray = []
170  seasonNumber = u''
171  episodeNumber = u''
172  for index in range(len(self.s_e_Regex)):
173  match = self.s_e_Regex[index].match(arg[0])
174  if match:
175  (seasonNumber, episodeNumber) = match.groups()
176  seasonNumber = u'%s' % int(seasonNumber)
177  episodeNumber = u'%s' % int(episodeNumber)
178  elementArray.append(etree.XML(u"<title>%s</title>" % (u'S%02dE%02d: %s' % (int(seasonNumber), int(episodeNumber), title))))
179  break
180  else:
181  elementArray.append(etree.XML(u"<title>%s</title>" % title ))
182  if seasonNumber:
183  tmpElement = etree.Element('{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season')
184  tmpElement.text = seasonNumber
185  elementArray.append(tmpElement)
186  if episodeNumber:
187  tmpElement = etree.Element('{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode')
188  tmpElement.text = episodeNumber
189  elementArray.append(tmpElement)
190  return elementArray
191  # end linuxActionTitleSeEp()
192 
193  def linuxActioncheckIfDBItem(self, context, *arg):
194  '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
195  has a matching item. This is done to save accessing the Internet when not required.
196  Call example: 'mnvXpath:linuxActioncheckIfDBItem(title, author)'
197  return True if a match was found
198  return False if a match was not found
199  '''
200  titleElement = self.linuxActionTitleSeEp('dummy', arg[0])[0]
201  return common.checkIfDBItem('dummy', {'feedtitle': 'Technology', 'title': titleElement.text, 'author': arg[1]})
202  # end linuxActioncheckIfDBItem()
203 
204 
209 
210 
215 
216 
long long copy(QFile &dst, QFile &src, uint block_size)
Copies src file to dst file.
def linuxActionLinkGeneration(self, context, *arg)
Start of XPath extension functions.