MythTV master
common_api.py
Go to the documentation of this file.
1# -*- coding: UTF-8 -*-
2
3# ----------------------
4# Name: common_api.py - Common class libraries for all MythNetvision Mashup processing
5# Python Script
6# Author: R.D. Vaughan
7# Purpose: This python script contains a number of common functions used for processing MythNetvision
8# Grabbers.
9#
10# License:Creative Commons GNU GPL v2
11# (http://creativecommons.org/licenses/GPL/2.0/)
12#-------------------------------------
13__title__ ="common_api - Common class libraries for all MythNetvision Mashup processing"
14__author__="R.D. Vaughan"
15__purpose__='''
16This python script is intended to perform a variety of utility functions for the processing of
17MythNetvision Grabber scripts that run as a Web application and global functions used by many
18MNV grabbers.
19'''
20
21__version__="v0.2.3"
22# 0.0.1 Initial development
23# 0.1.0 Alpha release
24# 0.1.1 Added the ability to have a mashup name independant of the mashup title
25# Added passing on the url the emml hostname and port so a mashup can call other emml mashups
26# 0.1.2 Modifications to support launching single treeview requests for better integration with MNV
27# subscription logic.
28# With the change to allow MNV launching individual tree views the auto shutdown feature had to be
29# disabled. Unless a safe work around can be found the feature may need to be removed entierly.
30# 0.1.3 Modifications to support calling grabbers that run on a Web server
31# Added a class of global functions that could be used by all grabbers
32# 0.1.4 Changed the rating item element to default to be empty rather than "0.0"
33# Changed the default logger to stderr
34# 0.1.5 Added functions and data structures for common "Mashups" grabbers
35# Changed the api name from "mashups_api" to "common_api"
36# Added XSLT stylsheets as an alternate process option in the threaded URL download functions
37# 0.1.6 Removed all logic associated with Web CGI calls as the MNV plugin is now on the backend
38# Made the pubDate fucntion more adaptable to various input date strings
39# 0.1.7 Added a common function to get the current selected language (default is 'en' English)
40# 0.1.8 Fixed a bug with two string functions
41# Added a customhtml reference for bliptv
42# 0.1.9 Add a function that allows grabbers to check if an item is already in the data base. This is used
43# to make grabbers more efficient when processing sources that are largely made up of the same
44# data. This is particularly important when a grabber is forces to do additional Interent accesses
45# to aquire all the needed MNV item data.
46# Added a function that checks if there are any treeview items in the data base for a specific
47# grabber. Some Mashup grabber's search option only returns results when then there are treeview
48# items in the database.
49# 0.2.0 Made the creation of custom HTML page links more flexible so code did not need to be changed
50# when new custom HTML pages were added.
51# 0.2.1 Add the ability for a parameters to be passed to a XSLT style sheet
52# 0.2.2 Added a common XPath extention to test if a string starts or ends with a substring
53# 0.2.3 Fixed Error messages that were not unicode strings
54
55import os, struct, sys, re, datetime, time, subprocess, string
56import urllib.request, urllib.parse, urllib.error
57import logging
58import telnetlib
59from threading import Thread
60
61from .common_exceptions import (WebCgiUrlError, WebCgiHttpError, WebCgiRssError, WebCgiVideoNotFound, WebCgiXmlError, )
62import io
63
64class OutStreamEncoder(object):
65 """Wraps a stream with an encoder"""
66 def __init__(self, outstream, encoding=None):
67 self.out = outstream
68 if not encoding:
69 self.encoding = sys.getfilesystemencoding()
70 else:
71 self.encoding = encoding
72
73 def write(self, obj):
74 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
75 if isinstance(obj, str):
76 obj = obj.encode(self.encoding)
77 self.out.buffer.write(obj)
78
79 def __getattr__(self, attr):
80 """Delegate everything but write to the stream"""
81 return getattr(self.out, attr)
82
83if isinstance(sys.stdout, io.TextIOWrapper):
84 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
85 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
86
87
88try:
89 from io import StringIO
90 from lxml import etree
91except Exception as e:
92 sys.stderr.write('\n! Error - Importing the "lxml" python library failed on error(%s)\n' % e)
93 sys.exit(1)
94
95
96
101class Common(object):
102 """A collection of common functions used by many grabbers
103 """
104 def __init__(self,
105 logger=False,
106 debug=False,
107 ):
108 self.logger = logger
109 self.debug = debug
110 self.baseProcessingDir = os.path.dirname( os.path.realpath( __file__ )).replace('/nv_python_libs/common', '')
111 self.namespaces = {
112 'xsi': "http://www.w3.org/2001/XMLSchema-instance",
113 'media': "http://search.yahoo.com/mrss/",
114 'xhtml': "http://www.w3.org/1999/xhtml",
115 'atm': "http://www.w3.org/2005/Atom",
116 'mythtv': "http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format",
117 'itunes':"http://www.itunes.com/dtds/podcast-1.0.dtd",
118 }
119 self.parsers = {
120 'xml': etree.XMLParser(remove_blank_text=True),
121 'html': etree.HTMLParser(remove_blank_text=True),
122 'xhtml': etree.HTMLParser(remove_blank_text=True),
123 }
124 self.pubDateFormat = '%a, %d %b %Y %H:%M:%S GMT'
125 self.mnvRSS = """
126<rss version="2.0"
127 xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd"
128 xmlns:content="http://purl.org/rss/1.0/modules/content/"
129 xmlns:cnettv="http://cnettv.com/mrss/"
130 xmlns:creativeCommons="http://backend.userland.com/creativeCommonsRssModule"
131 xmlns:media="http://search.yahoo.com/mrss/"
132 xmlns:atom="http://www.w3.org/2005/Atom"
133 xmlns:amp="http://www.adobe.com/amp/1.0"
134 xmlns:dc="http://purl.org/dc/elements/1.1/"
135 xmlns:mythtv="http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format">
136"""
137 self.mnvItem = '''
138<item>
139 <title></title>
140 <author></author>
141 <pubDate></pubDate>
142 <description></description>
143 <link></link>
144 <media:group xmlns:media="http://search.yahoo.com/mrss/">
145 <media:thumbnail url=''/>
146 <media:content url='' length='' duration='' width='' height='' lang=''/>
147 </media:group>
148 <rating></rating>
149</item>
150'''
151 # Season and Episode detection regex patterns
153 # "Series 7 - Episode 4" or "Series 7 - Episode 4" or "Series 7: On Holiday: Episode 10"
154 re.compile(r'''^.+?Series\\ (?P<seasno>[0-9]+).*.+?Episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
155 # Series 5 - 1
156 re.compile('''^.+?Series\\ (?P<seasno>[0-9]+)\\ \\-\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
157 # Series 1 - Warriors of Kudlak - Part 2
158 re.compile('''^.+?Series\\ (?P<seasno>[0-9]+).*.+?Part\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
159 # Series 3: Programme 3
160 re.compile('''^.+?Series\\ (?P<seasno>[0-9]+)\\:\\ Programme\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
161 # Series 3:
162 re.compile('''^.+?Series\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
163 # Episode 1
164 re.compile('''^.+?Episode\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
165 # Title: "s18 | e87"
166 re.compile('''^.+?[Ss](?P<seasno>[0-9]+).*.+?[Ee](?P<epno>[0-9]+).*$''', re.UNICODE),
167 # Description: "season 1, episode 5"
168 re.compile('''^.+?season\\ (?P<seasno>[0-9]+).*.+?episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
169 # Thumbnail: "http://media.thewb.com/thewb/images/thumbs/firefly/01/firefly_01_07.jpg"
170 re.compile('''(?P<seriesname>[^_]+)\\_(?P<seasno>[0-9]+)\\_(?P<epno>[0-9]+).*$''', re.UNICODE),
171 # Guid: "http://traffic.libsyn.com/divefilm/episode54hd.m4v"
172 re.compile('''^.+?episode(?P<epno>[0-9]+).*$''', re.UNICODE),
173 # Season 3, Episode 8
174 re.compile('''^.+?Season\\ (?P<seasno>[0-9]+).*.+?Episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
175 # "Episode 1" anywhere in text
176 re.compile('''^.+?Episode\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
177 # "Episode 1" at the start of the text
178 re.compile('''Episode\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
179 # "--0027--" when the episode is in the URL link
180 re.compile('''^.+?--(?P<seasno>[0-9]+)--.*$''', re.UNICODE),
181 ]
182 self.nv_python_libs_path = 'nv_python_libs'
183 self.apiSuffix = '_api'
184 self.language = 'en'
185 self.mythdb = None
186 self.linksWebPage = None
187 self.etree = etree
188 # end __init__()
189
190 def massageText(self, text):
191 '''Removes HTML markup from a text string.
192 @param text The HTML source.
193 @return The plain text. If the HTML source contains non-ASCII
194 entities or character references, this is a Unicode string.
195 '''
196 def fixup(m):
197 text = m.group(0)
198 if text[:1] == "<":
199 return "" # ignore tags
200 if text[:2] == "&#":
201 try:
202 if text[:3] == "&#x":
203 return chr(int(text[3:-1], 16))
204 else:
205 return chr(int(text[2:-1]))
206 except ValueError:
207 pass
208 elif text[:1] == "&":
209 import html.entities
210 entity = html.entities.entitydefs.get(text[1:-1])
211 if entity:
212 if entity[:2] == "&#":
213 try:
214 return chr(int(entity[2:-1]))
215 except ValueError:
216 pass
217 else:
218 return str(entity, "iso-8859-1")
219 return text # leave as is
220 return self.ampReplace(re.sub(r"(?s)<[^>]*>|&#?\w+;", fixup, self.textUtf8(text))).replace('\n',' ')
221 # end massageText()
222
223
224 def initLogger(self, path=sys.stderr, log_name='MNV_Grabber'):
225 """Setups a logger using the logging module, returns a logger object
226 """
227 logger = logging.getLogger(log_name)
228 formatter = logging.Formatter('%(asctime)s-%(levelname)s: %(message)s', '%Y-%m-%dT%H:%M:%S')
229
230 if path == sys.stderr:
231 hdlr = logging.StreamHandler(sys.stderr)
232 else:
233 hdlr = logging.FileHandler('%s/%s.log' % (path, log_name))
234
235 hdlr.setFormatter(formatter)
236 logger.addHandler(hdlr)
237
238 if self.debug:
239 logger.setLevel(logging.DEBUG)
240 else:
241 logger.setLevel(logging.INFO)
242 self.logger = logger
243 return logger
244 #end initLogger
245
246
247 def textUtf8(self, text):
248 if text is None:
249 return text
250 try:
251 return str(text, 'utf8')
252 except UnicodeDecodeError:
253 return ''
254 except (UnicodeEncodeError, TypeError):
255 return text
256 # end textUtf8()
257
258
259 def ampReplace(self, text):
260 '''Replace all &, ', ", <, and > characters with the predefined XML
261 entities
262 '''
263 text = self.textUtf8(text)
264 text = text.replace('&amp;','~~~~~').replace('&','&amp;').replace('~~~~~', '&amp;')
265 text = text.replace("'", "&apos;").replace('"', '&quot;')
266 text = text.replace('<', '&lt;').replace('>', '&gt;')
267 return text
268 # end ampReplace()
269
270 def callCommandLine(self, command, stderr=False):
271 '''Perform the requested command line and return an array of stdout strings and
272 stderr strings if stderr=True
273 return array of stdout string array or stdout and stderr string arrays
274 '''
275 stderrarray = []
276 stdoutarray = []
277 try:
278 p = subprocess.Popen(command, shell=True, bufsize=4096, stdin=subprocess.PIPE,
279 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
280 except Exception as e:
281 if self.logger:
282 self.logger.error('callCommandLine Popen Exception, error(%s)' % e)
283 if stderr:
284 return [[], []]
285 else:
286 return []
287
288 if stderr:
289 while True:
290 data = p.stderr.readline()
291 if not data:
292 break
293 try:
294 data = str(data, 'utf8')
295 except (UnicodeDecodeError):
296 continue # Skip any line is cannot be cast as utf8 characters
297 except (UnicodeEncodeError, TypeError):
298 pass
299 stderrarray.append(data)
300
301 while True:
302 data = p.stdout.readline()
303 if not data:
304 break
305 try:
306 data = str(data, 'utf8')
307 except (UnicodeDecodeError):
308 continue # Skip any line that has non-utf8 characters in it
309 except (UnicodeEncodeError, TypeError):
310 pass
311 stdoutarray.append(data)
312
313 if stderr:
314 return [stdoutarray, stderrarray]
315 else:
316 return stdoutarray
317 # end callCommandLine()
318
319
321 '''Get longitude and latitiude to find videos relative to your location. Up to three different
322 servers will be tried before giving up.
323 return a dictionary e.g.
324 {'Latitude': '43.6667', 'Country': 'Canada', 'Longitude': '-79.4167', 'City': 'Toronto'}
325 return an empty dictionary if there were any errors
326 Code found at: http://blog.suinova.com/2009/04/from-ip-to-geolocation-country-city.html
327 '''
328 def getExternalIP():
329 '''Find the external IP address of this computer.
330 '''
331 url = urllib.request.URLopener()
332 try:
333 resp = url.open('http://www.whatismyip.com/automation/n09230945.asp')
334 return resp.read()
335 except:
336 return None
337 # end getExternalIP()
338
339 ip = getExternalIP()
340
341 if ip is None:
342 return {}
343
344 try:
345 gs = urllib.request.urlopen('http://blogama.org/ip_query.php?ip=%s&output=xml' % ip)
346 txt = gs.read()
347 except:
348 try:
349 gs = urllib.request.urlopen('http://www.seomoz.org/ip2location/look.php?ip=%s' % ip)
350 txt = gs.read()
351 except:
352 try:
353 gs = urllib.request.urlopen('http://api.hostip.info/?ip=%s' % ip)
354 txt = gs.read()
355 except:
356 logging.error('GeoIP servers not available')
357 return {}
358 try:
359 if txt.find('<Response>') > 0:
360 countrys = re.findall(r'<CountryName>([\w ]+)<',txt)[0]
361 citys = re.findall(r'<City>([\w ]+)<',txt)[0]
362 lats,lons = re.findall(r'<Latitude>([\d\-\.]+)</Latitude>\s*<Longitude>([\d\-\.]+)<',txt)[0]
363 elif txt.find('GLatLng') > 0:
364 citys,countrys = re.findall(r'<br />\s*([^<]+)<br />\s*([^<]+)<',txt)[0]
365 lats,lons = re.findall(r'LatLng\‍(([-\d\.]+),([-\d\.]+)',txt)[0]
366 elif txt.find('<gml:coordinates>') > 0:
367 citys = re.findall(r'<Hostip>\s*<gml:name>(\w+)</gml:name>',txt)[0]
368 countrys = re.findall(r'<countryName>([\w ,\.]+)</countryName>',txt)[0]
369 lats,lons = re.findall(r'gml:coordinates>([-\d\.]+),([-\d\.]+)<',txt)[0]
370 else:
371 logging.error('error parsing IP result %s'%txt)
372 return {}
373 return {'Country':countrys,'City':citys,'Latitude':lats,'Longitude':lons}
374 except:
375 logging.error('Error parsing IP result %s'%txt)
376 return {}
377 # end detectUserLocationByIP()
378
379
381 """Common name for a custom HTML display. Used to interface with MythTV plugin NetVision
382 """
383 embedFlashVarFilter = etree.XPath('//embed', namespaces=self.namespaces)
384 variables = self.HTMLvideocode.split('?')
385
386 url = '%s/nv_python_libs/configs/HTML/%s' % (baseProcessingDir, variables[0])
387 try:
388 customHTML = etree.parse(url)
389 except Exception as e:
390 raise Exception("! Error: The Custom HTML file (%s) cause the exception error (%s)\n" % (url, errormsg))
391
392 # There may be one or more argumants to replace in the HTML code
393 # Example:
394 # "bbciplayer.html?AttribName1/FirstReplace=bc623bc?SecondReplace/AttribName2=wonderland/..."
395 for arg in variables[1:]:
396 (attrib, key_value) = arg.split('/')
397 (key, value) = key_value.split('=')
398 embedFlashVarFilter(customHTML)[0].attrib[attrib] = embedFlashVarFilter(customHTML)[0].attrib[attrib].replace(key, value)
399
400 sys.stdout.write(etree.tostring(customHTML, encoding='UTF-8', pretty_print=True))
401
402 sys.exit(0)
403 # end displayCustomHTML()
404
405
406 def mnvChannelElement(self, channelDetails):
407 ''' Create a MNV Channel element populated with channel details
408 return the channel element
409 '''
410 mnvChannel = etree.fromstring("""
411<channel>
412 <title>%(channel_title)s</title>
413 <link>%(channel_link)s</link>
414 <description>%(channel_description)s</description>
415 <numresults>%(channel_numresults)d</numresults>
416 <returned>%(channel_returned)d</returned>
417 <startindex>%(channel_startindex)d</startindex>
418</channel>
419""" % channelDetails
420 )
421 return mnvChannel
422 # end mnvChannelElement()
423
424 # Verify the a URL actually exists
425 def checkURL(self, url):
426 '''Verify that a URL actually exists. Be careful as redirects can lead to false positives. Use
427 the info details to be sure.
428 return True when it exists and info
429 return False when it does not exist and info
430 '''
431 urlOpened = urllib.request.urlopen(url)
432 code = urlOpened.getcode()
433 actualURL = urlOpened.geturl()
434 info = urlOpened.info()
435 urlOpened.close()
436 if code != 200:
437 return [False, info]
438 if url != actualURL:
439 return [False, info]
440 return [True, info]
441 # end checkURL()
442
443
444 def getUrlData(self, inputUrls, pageFilter=None):
445 ''' Fetch url data and extract the desired results using a dynamic filter or XSLT stylesheet.
446 The URLs are requested in parallel using threading
447 return the extracted data organised into directories
448 '''
449 urlDictionary = {}
450
451 if self.debug:
452 print("inputUrls:")
453 sys.stdout.write(etree.tostring(inputUrls, encoding='UTF-8', pretty_print=True))
454 print()
455
456 for element in inputUrls.xpath('.//url'):
457 key = element.find('name').text
458 urlDictionary[key] = {}
459 urlDictionary[key]['type'] = 'raw'
460 urlDictionary[key]['href'] = element.find('href').text
461 urlFilter = element.findall('filter')
462 if len(urlFilter):
463 urlDictionary[key]['type'] = 'xpath'
464 for index in range(len(urlFilter)):
465 urlFilter[index] = urlFilter[index].text
466 urlDictionary[key]['filter'] = urlFilter
467 urlXSLT = element.findall('xslt')
468 if len(urlXSLT):
469 urlDictionary[key]['type'] = 'xslt'
470 for index in range(len(urlXSLT)):
471 urlXSLT[index] = etree.XSLT(etree.parse('%s/nv_python_libs/configs/XSLT/%s.xsl' % (self.baseProcessingDir, urlXSLT[index].text)))
472 urlDictionary[key]['xslt'] = urlXSLT
473 urlDictionary[key]['pageFilter'] = pageFilter
474 urlDictionary[key]['parser'] = self.parsers[element.find('parserType').text].copy()
475 urlDictionary[key]['namespaces'] = self.namespaces
476 urlDictionary[key]['result'] = []
477 urlDictionary[key]['morePages'] = 'false'
478 urlDictionary[key]['tmp'] = None
479 urlDictionary[key]['tree'] = None
480 if element.find('parameter') is not None:
481 urlDictionary[key]['parameter'] = element.find('parameter').text
482
483 if self.debug:
484 print("urlDictionary:")
485 print(urlDictionary)
486 print()
487
488 thread_list = []
489 getURL.urlDictionary = urlDictionary
490
491 # Single threaded (commented out) - Only used to prove that multi-threading does
492 # not cause data corruption
493# for key in urlDictionary.keys():
494# current = getURL(key, self.debug)
495# thread_list.append(current)
496# current.start()
497# current.join()
498
499 # Multi-threaded
500 for key in list(urlDictionary.keys()):
501 current = getURL(key, self.debug)
502 thread_list.append(current)
503 current.start()
504 for thread in thread_list:
505 thread.join()
506
507 # Take the results and make the return element tree
508 root = etree.XML("<xml></xml>")
509 for key in sorted(getURL.urlDictionary.keys()):
510 if not len(getURL.urlDictionary[key]['result']):
511 continue
512 results = etree.SubElement(root, "results")
513 etree.SubElement(results, "name").text = key
514 etree.SubElement(results, "url").text = urlDictionary[key]['href']
515 etree.SubElement(results, "type").text = urlDictionary[key]['type']
516 etree.SubElement(results, "pageInfo").text = getURL.urlDictionary[key]['morePages']
517 result = etree.SubElement(results, "result")
518 if len(getURL.urlDictionary[key]['filter']):
519 for index in range(len(getURL.urlDictionary[key]['result'])):
520 for element in getURL.urlDictionary[key]['result'][index]:
521 result.append(element)
522 elif len(getURL.urlDictionary[key]['xslt']):
523 for index in range(len(getURL.urlDictionary[key]['result'])):
524 for element in getURL.urlDictionary[key]['result'][index].getroot():
525 result.append(element)
526 else:
527 for element in getURL.urlDictionary[key]['result'][0].xpath('/*'):
528 result.append(element)
529
530 if self.debug:
531 print("root:")
532 sys.stdout.write(etree.tostring(root, encoding='UTF-8', pretty_print=True))
533 print()
534
535 return root
536 # end getShows()
537
538
542 ''' Create a dictionary of functions that manipulate items data. These functions are imported
543 from other MNV grabbers. These functions are meant to be used by the MNV WebCgi type of grabber
544 which aggregates data from a number of different sources (e.g. RSS feeds and HTML Web pages)
545 including sources from other grabbers.
546 Using a dictionary facilitates mixing XSLT functions with pure python functions to use the best
547 capabilities of both technologies when translating source information into MNV compliant item
548 data.
549 return nothing
550 '''
551 # Add the common XPath extention functions
553 'pubDate': self.pubDate,
554 'getSeasonEpisode': self.getSeasonEpisode,
555 'convertDuration': self.convertDuration,
556 'getHtmlData': self.getHtmlData,
557 'linkWebPage': self.linkWebPage,
558 'baseDir': self.baseDir,
559 'stringLower': self.stringLower,
560 'stringUpper': self.stringUpper,
561 'stringReplace': self.stringReplace,
562 'stringEscape': self.stringEscape,
563 'removePunc': self.removePunc,
564 'htmlToString': self.htmlToString,
565 'checkIfDBItem': self.checkIfDBItem,
566 'getItemElement': self.getItemElement,
567 'getDBRecords': self.getDBRecords,
568 'createItemElement': self.createItemElement,
569 'testSubString': self.testSubString,
570 }
571 # Get the specific source functions
572 self.addDynamicFunctions('xsltfunctions')
573 return
574 # end buildFunctionDict()
575
576 def addDynamicFunctions(self, dirPath):
577 ''' Dynamically add functions to the function dictionary from a specified directory
578 return nothing
579 '''
580 fullPath = '%s/nv_python_libs/%s' % (self.baseProcessingDir, dirPath)
581 sys.path.append(fullPath)
582 # Make a list of all functions that need to be included
583 fileList = []
584 for fPath in os.listdir(fullPath):
585 filepath, filename = os.path.split( fPath )
586 filename, ext = os.path.splitext( filename )
587 if filename == '__init__':
588 continue
589 if ext != '.py':
590 continue
591 fileList.append(filename)
592
593 # Do not stop when there is an abort on a library just send an error message to stderr
594 for fileName in fileList:
595 filename = {'filename': fileName, }
596 try:
597 exec('''
598import %(filename)s
599%(filename)s.common = self
600for xpathClass in %(filename)s.__xpathClassList__:
601 exec(u'xpathClass = %(filename)s.%%s()' %% xpathClass)
602 for func in xpathClass.functList:
603 exec("self.functionDict['%%s'] = %%s" %% (func, u'xpathClass.%%s' %% func))
604for xsltExtension in %(filename)s.__xsltExtentionList__:
605 exec("self.functionDict['%%s'] = %%s" %% (xsltExtension, u'%(filename)s.%%s' %% xsltExtension))''' % filename )
606 except Exception as errmsg:
607 sys.stderr.write('! Error: Dynamic import of (%s) XPath and XSLT extention functions\nmessage(%s)\n' % (fileName, errmsg))
608
609 return
610 # end addDynamicFunctions()
611
612 def pubDate(self, context, *inputArgs):
613 '''Convert a date/time string in a specified format into a pubDate. The default is the
614 MNV item format
615 return the formatted pubDate string
616 return on error return the original date string
617 '''
618 args = []
619 for arg in inputArgs:
620 args.append(arg)
621 if args[0] == '':
622 return datetime.datetime.now().strftime(self.pubDateFormat)
623 index = args[0].find('+')
624 if index == -1:
625 index = args[0].find('-')
626 if index != -1 and index > 5:
627 args[0] = args[0][:index].strip()
628 args[0] = args[0].replace(',', '').replace('.', '')
629 try:
630 if len(args) > 1:
631 args[1] = args[1].replace(',', '').replace('.', '')
632 if args[1].find('GMT') != -1:
633 args[1] = args[1][:args[1].find('GMT')].strip()
634 args[0] = args[0][:args[0].rfind(' ')].strip()
635 try:
636 pubdate = time.strptime(args[0], args[1])
637 except ValueError:
638 if args[1] == '%a %d %b %Y %H:%M:%S':
639 pubdate = time.strptime(args[0], '%a %d %B %Y %H:%M:%S')
640 elif args[1] == '%a %d %B %Y %H:%M:%S':
641 pubdate = time.strptime(args[0], '%a %d %b %Y %H:%M:%S')
642 if len(args) > 2:
643 return time.strftime(args[2], pubdate)
644 else:
645 return time.strftime(self.pubDateFormat, pubdate)
646 else:
647 return datetime.datetime.now().strftime(self.pubDateFormat)
648 except Exception as err:
649 sys.stderr.write('! Error: pubDate variables(%s) error(%s)\n' % (args, err))
650 return args[0]
651 # end pubDate()
652
653 def getSeasonEpisode(self, context, text):
654 ''' Check is there is any season or episode number information in an item's text
655 return a string of season and/or episode numbers e.g. "2_21"
656 return a string with "None_None" values
657 '''
658 s_e = [None, None]
659 for regexPattern in self.s_e_Patterns:
660 match = regexPattern.match(text)
661 if not match:
662 continue
663 season_episode = match.groups()
664 if len(season_episode) > 1:
665 s_e[0] = season_episode[0]
666 s_e[1] = season_episode[1]
667 else:
668 s_e[1] = season_episode[0]
669 return '%s_%s' % (s_e[0], s_e[1])
670 return '%s_%s' % (s_e[0], s_e[1])
671 # end getSeasonEpisode()
672
673 def convertDuration(self, context, duration):
674 ''' Take a duration and convert it to seconds
675 return a string of seconds
676 '''
677 min_sec = duration.split(':')
678 seconds = 0
679 for count in range(len(min_sec)):
680 if count != len(min_sec)-1:
681 seconds+=int(min_sec[count])*(60*(len(min_sec)-count-1))
682 else:
683 seconds+=int(min_sec[count])
684 return '%s' % seconds
685 # end convertDuration()
686
687 def getHtmlData(self, context, *args):
688 ''' Take a HTML string and convert it to an HTML element. Then apply a filter and return
689 that value.
690 return filter value as a string
691 return an empty sting if the filter failed to find any values.
692 '''
693 xpathFilter = None
694 if len(args) > 1:
695 xpathFilter = args[0]
696 htmldata = args[1]
697 else:
698 htmldata = args[0]
699 htmlElement = etree.HTML(htmldata)
700 if not xpathFilter:
701 return htmlElement
702 filteredData = htmlElement.xpath(xpathFilter)
703 if len(filteredData):
704 if xpathFilter.find('@') != -1:
705 return filteredData[0]
706 else:
707 return filteredData[0].text
708 return ''
709 # end getHtmlData()
710
711 def linkWebPage(self, context, sourceLink):
712 ''' Check if there is a special local HTML page for the link. If not then return a generic
713 download only local HTML url.
714 return a file://.... link to a local HTML web page
715 '''
716 # Currently there are no link specific Web pages
717 if not self.linksWebPage:
718 self.linksWebPage = etree.parse('%s/nv_python_libs/configs/XML/customeHtmlPageList.xml' % (self.baseProcessingDir, ))
719 if self.linksWebPage.find(sourceLink) is not None:
720 return 'file://%s/nv_python_libs/configs/HTML/%s' % (self.baseProcessingDir, self.linksWebPage.find(sourceLink).text)
721 return 'file://%s/nv_python_libs/configs/HTML/%s' % (self.baseProcessingDir, 'nodownloads.html')
722 # end linkWebPage()
723
724 def baseDir(self, context, dummy):
725 ''' Return the base directory string
726 return the base directory
727 '''
728 return self.baseProcessingDir
729 # end baseDir()
730
731 def stringLower(self, context, data):
732 '''
733 return a lower case string
734 '''
735 if not len(data):
736 return ''
737 return data[0].lower()
738 # end stringLower()
739
740 def stringUpper(self, context, data):
741 '''
742 return a upper case string
743 '''
744 if not len(data):
745 return ''
746 return data[0].upper()
747 # end stringUpper()
748
749 def stringReplace(self, context, *inputArgs):
750 ''' Replace substring values in a string
751 return the resulting string from a replace operation
752 '''
753 args = []
754 for arg in inputArgs:
755 args.append(arg)
756 if not len(args) or len(args) == 1:
757 return data
758 if len(args) == 2:
759 args[0] = args[0].replace(args[1], "")
760 else:
761 args[0] = args[0].replace(args[1], args[2])
762 return args[0].strip()
763 # end stringReplace()
764
765 def stringEscape(self, context, *args):
766 ''' Replace substring values in a string
767 return the resulting string from a replace operation
768 '''
769 if not len(args):
770 return ""
771 if len(args) == 1:
772 return urllib.parse.quote_plus(args[0].encode("utf-8"))
773 else :
774 return urllib.parse.quote_plus(args[0].encode("utf-8"), args[1])
775 # end stringEscape()
776
777 def removePunc(self, context, data):
778 ''' Remove all punctuation for a string
779 return the resulting string
780 '''
781 if not len(data):
782 return ""
783 return re.sub('[%s]' % re.escape(string.punctuation), '', data)
784 # end removePunc()
785
786 def htmlToString(self, context, html):
787 ''' Remove HTML tags and LFs from a string
788 return the string without HTML tags or LFs
789 '''
790 if not len(html):
791 return ""
792 return self.massageText(html).strip().replace('\n', ' ').replace('’', "&apos;").replace('“', "&apos;")
793 # end htmlToString()
794
795 def getLanguage(self, context, args):
796 ''' Return the current selected language code
797 return language code
798 '''
799 return self.language
800 # end getLanguage()
801
802 def checkIfDBItem(self, context, arg):
803 ''' Find an 'internetcontentarticles' table record based on fields and values
804 return True if a record was found and an item element created
805 return False if no record was found
806 '''
807 results = self.getDBRecords('dummy', arg)
808 if len(results):
809 self.itemElement = self.createItemElement('dummy', results[0])
810 return True
811 return False
812 # end checkIfDBItem()
813
814 def getItemElement(self, context, arg):
815 ''' Return an item element that was created by a previous call to the checkIfDBItem function
816 '''
817 return self.itemElement
818 # end getItemElement()
819
820 def testSubString(self, context, *arg):
821 ''' Return True or False if a substring is at the beginning or end of a string
822 '''
823 if arg[0] == 'starts':
824 return arg[1].startswith(arg[2])
825 elif arg[0] == 'ends':
826 return arg[1].endswith(arg[2])
827 else:
828 index = arg[1].find(arg[2])
829 if index == -1:
830 return False
831 else:
832 return True
833 # end testSubString()
834
835 def getDBRecords(self, context, *arg):
836 ''' Return a list of 'internetcontentarticles' table records based on field and value matches
837 '''
838 if not self.mythdb:
839 self.initializeMythDB()
840 self.itemThumbnail = etree.XPath('.//media:thumbnail', namespaces=self.namespaces)
841 self.itemContent = etree.XPath('.//media:content', namespaces=self.namespaces)
842 # Encode the search text to UTF-8
843 for key in list(arg[0].keys()):
844 try:
845 arg[0][key] = arg[0][key].encode('UTF-8')
846 except:
847 return []
848 return list(self.mythdb.searchInternetContent(**arg[0]))
849 # end getDBItem()
850
851 def createItemElement(self, context, *arg):
852 ''' Create an item element from an 'internetcontentarticles' table record dictionary
853 return the item element
854 '''
855 result = arg[0]
856 itemElement = etree.XML(self.mnvItem)
857 # Insert data into a new item element
858 itemElement.find('link').text = result['url']
859 if result['title']:
860 itemElement.find('title').text = result['title']
861 if result['subtitle']:
862 etree.SubElement(itemElement, "subtitle").text = result['subtitle']
863 if result['description']:
864 itemElement.find('description').text = result['description']
865 if result['author']:
866 itemElement.find('author').text = result['author']
867 if result['date']:
868 itemElement.find('pubDate').text = result['date'].strftime(self.pubDateFormat)
869 if result['rating'] != '32576' and result['rating'][0] != '-':
870 itemElement.find('rating').text = result['rating']
871 if result['thumbnail']:
872 self.itemThumbnail(itemElement)[0].attrib['url'] = result['thumbnail']
873 if result['mediaURL']:
874 self.itemContent(itemElement)[0].attrib['url'] = result['mediaURL']
875 if result['filesize'] > 0:
876 self.itemContent(itemElement)[0].attrib['length'] = str(result['filesize'])
877 if result['time'] > 0:
878 self.itemContent(itemElement)[0].attrib['duration'] = str(result['time'])
879 if result['width'] > 0:
880 self.itemContent(itemElement)[0].attrib['width'] = str(result['width'])
881 if result['height'] > 0:
882 self.itemContent(itemElement)[0].attrib['height'] = str(result['height'])
883 if result['language']:
884 self.itemContent(itemElement)[0].attrib['lang'] = result['language']
885 if result['season'] > 0:
886 etree.SubElement(itemElement, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = str(result['season'])
887 if result['episode'] > 0:
888 etree.SubElement(itemElement, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = str(result['episode'])
889 if result['customhtml'] == 1:
890 etree.SubElement(itemElement, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text = 'true'
891 if result['countries']:
892 countries = result['countries'].split(' ')
893 for country in countries:
894 etree.SubElement(itemElement, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = country
895 return itemElement
896 # end createItemElement()
897
899 ''' Import the MythTV database bindings
900 return nothing
901 '''
902 try:
903 from MythTV import MythDB, MythLog, MythError
904 try:
905 '''Create an instance of each: MythDB
906 '''
907 MythLog._setlevel('none') # Some non option -M cannot have any logging on stdout
908 self.mythdb = MythDB()
909 except MythError as e:
910 sys.stderr.write('\n! Error - %s\n' % e.args[0])
911 filename = os.path.expanduser("~")+'/.mythtv/config.xml'
912 if not os.path.isfile(filename):
913 sys.stderr.write('\n! Error - A correctly configured (%s) file must exist\n' % filename)
914 else:
915 sys.stderr.write('\n! Error - Check that (%s) is correctly configured\n' % filename)
916 sys.exit(1)
917 except Exception as e:
918 sys.stderr.write("\n! Error - Creating an instance caused an error for one of: MythDB. error(%s)\n" % e)
919 sys.exit(1)
920 except Exception as e:
921 sys.stderr.write("\n! Error - MythTV python bindings could not be imported. error(%s)\n" % e)
922 sys.exit(1)
923 # end initializeMythDB()
924
925
926
930
931class getURL(Thread):
932 ''' Threaded download of a URL and filter out the desired data for XML and (X)HTML
933 return the filter results
934 '''
935 def __init__ (self, urlKey, debug):
936 Thread.__init__(self)
937 self.urlKey = urlKey
938 self.debug = debug
939
940 def run(self):
941 if self.debug:
942 print("getURL href(%s)" % (self.urlDictionary[self.urlKey]['href'], ))
943 print()
944
945 # Input the data from a url
946 try:
947 self.urlDictionary[self.urlKey]['tree'] = etree.parse(self.urlDictionary[self.urlKey]['href'], self.urlDictionary[self.urlKey]['parser'])
948 except Exception as errormsg:
949 sys.stderr.write("! Error: The URL (%s) cause the exception error (%s)\n" % (self.urlDictionary[self.urlKey]['href'], errormsg))
950 return
951
952 if self.debug:
953 print("Raw unfiltered URL input:")
954 sys.stdout.write(etree.tostring(self.urlDictionary[self.urlKey]['tree'], encoding='UTF-8', pretty_print=True))
955 print()
956
957 if len(self.urlDictionary[self.urlKey]['filter']):
958 for index in range(len(self.urlDictionary[self.urlKey]['filter'])):
959 # Filter out the desired data
960 try:
961 self.urlDictionary[self.urlKey]['tmp'] = self.urlDictionary[self.urlKey]['tree'].xpath(self.urlDictionary[self.urlKey]['filter'][index], namespaces=self.urlDictionary[self.urlKey]['namespaces'])
962 except AssertionError as e:
963 sys.stderr.write("No filter results for Name(%s)\n" % self.urlKey)
964 sys.stderr.write("No filter results for url(%s)\n" % self.urlDictionary[self.urlKey]['href'])
965 sys.stderr.write("! Error:(%s)\n" % e)
966 if len(self.urlDictionary[self.urlKey]['filter']) == index-1:
967 return
968 else:
969 continue
970 self.urlDictionary[self.urlKey]['result'].append(self.urlDictionary[self.urlKey]['tmp'])
971 elif len(self.urlDictionary[self.urlKey]['xslt']):
972 for index in range(len(self.urlDictionary[self.urlKey]['xslt'])):
973 # Process the results through a XSLT stylesheet out the desired data
974 try:
975 if 'parameter' in self.urlDictionary[self.urlKey]:
976 self.urlDictionary[self.urlKey]['tmp'] = self.urlDictionary[self.urlKey]['xslt'][index](self.urlDictionary[self.urlKey]['tree'], paraMeter= etree.XSLT.strparam(
977self.urlDictionary[self.urlKey]['parameter']) )
978 else:
979 self.urlDictionary[self.urlKey]['tmp'] = self.urlDictionary[self.urlKey]['xslt'][index](self.urlDictionary[self.urlKey]['tree'])
980 except Exception as e:
981 sys.stderr.write("! XSLT Error:(%s) Key(%s)\n" % (e, self.urlKey))
982 if len(self.urlDictionary[self.urlKey]['filter']) == index-1:
983 return
984 else:
985 continue
986 # Was any data found?
987 if self.urlDictionary[self.urlKey]['tmp'].getroot() is None:
988 sys.stderr.write("No Xslt results for Name(%s)\n" % self.urlKey)
989 sys.stderr.write("No Xslt results for url(%s)\n" % self.urlDictionary[self.urlKey]['href'])
990 if len(self.urlDictionary[self.urlKey]['filter']) == index-1:
991 return
992 else:
993 continue
994 self.urlDictionary[self.urlKey]['result'].append(self.urlDictionary[self.urlKey]['tmp'])
995 else:
996 # Just pass back the raw data
997 self.urlDictionary[self.urlKey]['result'] = [self.urlDictionary[self.urlKey]['tree']]
998
999 # Check whether there are more pages available
1000 if self.urlDictionary[self.urlKey]['pageFilter']:
1001 if len(self.urlDictionary[self.urlKey]['tree'].xpath(self.urlDictionary[self.urlKey]['pageFilter'], namespaces=self.urlDictionary[self.urlKey]['namespaces'])):
1002 self.urlDictionary[self.urlKey]['morePages'] = 'true'
1003 return
1004 # end run()
1005# end class getURL()
1006
1007
Definition: mythdb.h:15
Start - Utility functions.
Definition: common_api.py:101
def stringReplace(self, context, *inputArgs)
Definition: common_api.py:749
def getLanguage(self, context, args)
Definition: common_api.py:795
def getHtmlData(self, context, *args)
Definition: common_api.py:687
def callCommandLine(self, command, stderr=False)
Definition: common_api.py:270
def getDBRecords(self, context, *arg)
Definition: common_api.py:835
def removePunc(self, context, data)
Definition: common_api.py:777
def __init__(self, logger=False, debug=False)
Definition: common_api.py:107
def baseDir(self, context, dummy)
Definition: common_api.py:724
def getSeasonEpisode(self, context, text)
Definition: common_api.py:653
def stringEscape(self, context, *args)
Definition: common_api.py:765
def checkIfDBItem(self, context, arg)
Definition: common_api.py:802
def createItemElement(self, context, *arg)
Definition: common_api.py:851
def htmlToString(self, context, html)
Definition: common_api.py:786
def mnvChannelElement(self, channelDetails)
Definition: common_api.py:406
def initLogger(self, path=sys.stderr, log_name='MNV_Grabber')
Definition: common_api.py:224
def testSubString(self, context, *arg)
Definition: common_api.py:820
def pubDate(self, context, *inputArgs)
Definition: common_api.py:612
def stringUpper(self, context, data)
Definition: common_api.py:740
def getUrlData(self, inputUrls, pageFilter=None)
Definition: common_api.py:444
def linkWebPage(self, context, sourceLink)
Definition: common_api.py:711
def getItemElement(self, context, arg)
Definition: common_api.py:814
def stringLower(self, context, data)
Definition: common_api.py:731
def buildFunctionDict(self)
Start - Utility functions specifically used to modify MNV item data.
Definition: common_api.py:541
def convertDuration(self, context, duration)
Definition: common_api.py:673
def __init__(self, outstream, encoding=None)
Definition: common_api.py:66
def __init__(self, urlKey, debug)
Definition: common_api.py:935
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)
MBASE_PUBLIC long long copy(QFile &dst, QFile &src, uint block_size=0)
Copies src file to dst file.
def error(message)
Definition: smolt.py:409
static void print(const QList< uint > &raw_minimas, const QList< uint > &raw_maximas, const QList< float > &minimas, const QList< float > &maximas)