MythTV  master
mevio_api.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3 # ----------------------
4 # Name: mevio_api - XPath and XSLT functions for the Mevio RSS/HTML items
5 # Python Script
6 # Author: R.D. Vaughan
7 # Purpose: This python script is intended to perform a variety of utility functions
8 # for the conversion of data to the MNV standard RSS output format.
9 # See this link for the specifications:
10 # http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
11 #
12 # License:Creative Commons GNU GPL v2
13 # (http://creativecommons.org/licenses/GPL/2.0/)
14 #-------------------------------------
15 __title__ ="mevio_api - XPath and XSLT functions for the www.mevio.com RSS/HTML"
16 __author__="R.D. Vaughan"
17 __purpose__='''
18 This python script is intended to perform a variety of utility functions
19 for the conversion of data to the MNV standard RSS output format.
20 See this link for the specifications:
21 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
22 '''
23 
24 __version__="v0.1.1"
25 # 0.1.0 Initial development
26 # 0.1.1 Fixed a bug when an autoplay link cannot be created
27 # Added MP4 as an acceptable downloadable video file type
28 # Added checking to see if the item is already in the data base
29 
30 # Specify the class names that have XPath extention functions
31 __xpathClassList__ = ['xpathFunctions', ]
32 
33 # Specify the XSLT extention class names. Each class is a stand lone extention function
34 #__xsltExtentionList__ = ['xsltExtExample', ]
35 __xsltExtentionList__ = []
36 
37 import os, sys, re, time, datetime, shutil, urllib, string
38 from copy import deepcopy
39 
40 
41 class OutStreamEncoder(object):
42  """Wraps a stream with an encoder"""
43  def __init__(self, outstream, encoding=None):
44  self.out = outstream
45  if not encoding:
46  self.encoding = sys.getfilesystemencoding()
47  else:
48  self.encoding = encoding
49 
50  def write(self, obj):
51  """Wraps the output stream, encoding Unicode strings with the specified encoding"""
52  if isinstance(obj, unicode):
53  try:
54  self.out.write(obj.encode(self.encoding))
55  except IOError:
56  pass
57  else:
58  try:
59  self.out.write(obj)
60  except IOError:
61  pass
62 
63  def __getattr__(self, attr):
64  """Delegate everything but write to the stream"""
65  return getattr(self.out, attr)
66 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
67 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
68 
69 try:
70  from StringIO import StringIO
71  from lxml import etree
72 except Exception, e:
73  sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
74  sys.exit(1)
75 
76 # Check that the lxml library is current enough
77 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
78 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
79 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
80 version = ''
81 for digit in etree.LIBXML_VERSION:
82  version+=str(digit)+'.'
83 version = version[:-1]
84 if version < '2.7.2':
85  sys.stderr.write(u'''
86 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
87  At least "libxml" version 2.7.2 must be installed. Your version is (%s).
88 ''' % version)
89  sys.exit(1)
90 
91 
92 class xpathFunctions(object):
93  """Functions specific extending XPath
94  """
95  def __init__(self):
96  self.functList = ['mevioLinkGeneration', 'mevioTitle', 'mevioEpisode', 'mevioCheckIfDBItem', ]
97  self.episodeRegex = [
98  # Episode 224
99  re.compile(u'''^.+?Episode\\ (?P<episodeno>[0-9]+).*$''', re.UNICODE),
100  # CrankyGeeks 136:
101  re.compile(u'''^.+?(?P<episodeno>[0-9]+)\\:.*$''', re.UNICODE),
102  ]
103  self.namespaces = {
104  'atom10': u"http://www.w3.org/2005/Atom",
105  'media': u"http://search.yahoo.com/mrss/",
106  'itunes':"http://www.itunes.com/dtds/podcast-1.0.dtd",
107  'xhtml': u"http://www.w3.org/1999/xhtml",
108  'feedburner': u"http://rssnamespace.org/feedburner/ext/1.0",
109  'mythtv': "http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format",
110  'dc': "http://purl.org/dc/elements/1.1/",
111  'fb': "http://www.facebook.com/2008/fbml/",
112  }
113  self.mediaIdFilters = [
114  [etree.XPath(".//embed/@flashvars", namespaces=self.namespaces), re.compile(u'''^.+?MediaId=(?P<videocode>[0-9]+).*$''', re.UNICODE)],
115  [etree.XPath(".//div[@class='player_wrapper']/a/@href", namespaces=self.namespaces), re.compile(u'''^.+?\\'(?P<videocode>[0-9]+)\\'\\)\\;.*$''', re.UNICODE)]
116  ]
117  # end __init__()
118 
119 
124 
125  def mevioLinkGeneration(self, context, *arg):
126  '''Generate a link for the video.
127  Call example: 'mnvXpath:mevioLinkGeneration(string(link))'
128  return the url link
129  '''
130  webURL = arg[0]
131  try:
132  tmpHTML = etree.parse(webURL, etree.HTMLParser())
133  except Exception, errmsg:
134  sys.stderr.write(u"Error reading url(%s) error(%s)\n" % (webURL, errmsg))
135  return webURL
136 
137  for index in range(len(self.mediaIdFilters)):
138  mediaId = self.mediaIdFilters[index][0](tmpHTML)
139  if not len(mediaId):
140  continue
141  match = self.mediaIdFilters[index][1].match(mediaId[0])
142  if match:
143  videocode = match.groups()
144  return u'file://%s/nv_python_libs/configs/HTML/mevio.html?videocode=%s' % (common.baseProcessingDir, videocode[0])
145  else:
146  return webURL
147  # end mevioLinkGeneration()
148 
149  def mevioTitle(self, context, arg):
150  '''Parse the title string extract only the title text removing the redundant show name
151  Call example: 'mnvXpath:mevioTitle(./title/text())'
152  return the title text
153  '''
154  epText = self.mevioEpisode('dummy', arg).text
155  if epText:
156  epText = u'Ep %s: ' % epText
157  else:
158  epText = u''
159  seperatorStrs = [[' | ', 'before'], [': ', 'after'], [' - ', 'before']]
160  for sepStr in seperatorStrs:
161  if sepStr[1] == 'after':
162  index = arg[0].find(sepStr[0])
163  else:
164  index = arg[0].rfind(sepStr[0])
165  if index != -1:
166  if sepStr[1] == 'after':
167  return u'%s%s' % (epText, arg[0][index+len(sepStr[0]):].strip())
168  else:
169  return u'%s%s' % (epText, arg[0][:index].strip())
170  else:
171  if epText:
172  return epText
173  else:
174  return arg[0].strip()
175  # end mevioTitle()
176 
177  def mevioEpisode(self, context, arg):
178  '''Parse the title string and extract an episode number
179  Call example: 'mnvXpath:mevioEpisode(./title/text())'
180  return an episode element
181  '''
182  episodeNumber = u''
183  for index in range(len(self.episodeRegex)):
184  match = self.episodeRegex[index].match(arg[0])
185  if match:
186  episodeNumber = match.groups()
187  break
188  return etree.XML(u'<episode>%s</episode>' % episodeNumber)
189  # end mevioEpisode()
190 
191  def mevioCheckIfDBItem(self, context, *arg):
192  '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
193  has a matching item. This is done to save accessing the Internet when not required.
194  Call example: 'mnvXpath:mevioCheckIfDBItem(title, description)'
195  return True if a match was found
196  return False if a match was not found
197  '''
198  return common.checkIfDBItem('dummy', {'feedtitle': 'Technology', 'title': arg[0], 'description': arg[1]})
199  # end mevioCheckIfDBItem()
200 
201 
206 
207 
212 
213 
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)
def mevioLinkGeneration(self, context, *arg)
Start of XPath extension functions.
Definition: mevio_api.py:125
def __init__(self, outstream, encoding=None)
Definition: mevio_api.py:43