13 __title__ =
"common_api - Common class libraries for all MythNetvision Mashup processing"
14 __author__=
"R.D. Vaughan"
16 This python script is intended to perform a variety of utility functions for the processing of
17 MythNetvision Grabber scripts that run as a Web application and global functions used by many
55 import os, struct, sys, re, datetime, time, subprocess, string
56 import urllib.request, urllib.parse, urllib.error
59 from threading
import Thread
61 from .common_exceptions
import (WebCgiUrlError, WebCgiHttpError, WebCgiRssError, WebCgiVideoNotFound, WebCgiXmlError, )
65 """Wraps a stream with an encoder"""
74 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
75 if isinstance(obj, str):
77 self.
out.buffer.write(obj)
80 """Delegate everything but write to the stream"""
81 return getattr(self.
out, attr)
83 if isinstance(sys.stdout, io.TextIOWrapper):
89 from io
import StringIO
90 from lxml
import etree
91 except Exception
as e:
92 sys.stderr.write(
'\n! Error - Importing the "lxml" python library failed on error(%s)\n' % e)
102 """A collection of common functions used by many grabbers
110 self.
baseProcessingDir = os.path.dirname( os.path.realpath( __file__ )).replace(
'/nv_python_libs/common',
'')
112 'xsi':
"http://www.w3.org/2001/XMLSchema-instance",
113 'media':
"http://search.yahoo.com/mrss/",
114 'xhtml':
"http://www.w3.org/1999/xhtml",
115 'atm':
"http://www.w3.org/2005/Atom",
116 'mythtv':
"http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format",
117 'itunes':
"http://www.itunes.com/dtds/podcast-1.0.dtd",
120 'xml': etree.XMLParser(remove_blank_text=
True),
121 'html': etree.HTMLParser(remove_blank_text=
True),
122 'xhtml': etree.HTMLParser(remove_blank_text=
True),
127 xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd"
128 xmlns:content="http://purl.org/rss/1.0/modules/content/"
129 xmlns:cnettv="http://cnettv.com/mrss/"
130 xmlns:creativeCommons="http://backend.userland.com/creativeCommonsRssModule"
131 xmlns:media="http://search.yahoo.com/mrss/"
132 xmlns:atom="http://www.w3.org/2005/Atom"
133 xmlns:amp="http://www.adobe.com/amp/1.0"
134 xmlns:dc="http://purl.org/dc/elements/1.1/"
135 xmlns:mythtv="http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format">
142 <description></description>
144 <media:group xmlns:media="http://search.yahoo.com/mrss/">
145 <media:thumbnail url=''/>
146 <media:content url='' length='' duration='' width='' height='' lang=''/>
154 re.compile(
r'''^.+?Series\\ (?P<seasno>[0-9]+).*.+?Episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
156 re.compile(
'''^.+?Series\\ (?P<seasno>[0-9]+)\\ \\-\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
158 re.compile(
'''^.+?Series\\ (?P<seasno>[0-9]+).*.+?Part\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
160 re.compile(
'''^.+?Series\\ (?P<seasno>[0-9]+)\\:\\ Programme\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
162 re.compile(
'''^.+?Series\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
164 re.compile(
'''^.+?Episode\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
166 re.compile(
'''^.+?[Ss](?P<seasno>[0-9]+).*.+?[Ee](?P<epno>[0-9]+).*$''', re.UNICODE),
168 re.compile(
'''^.+?season\\ (?P<seasno>[0-9]+).*.+?episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
170 re.compile(
'''(?P<seriesname>[^_]+)\\_(?P<seasno>[0-9]+)\\_(?P<epno>[0-9]+).*$''', re.UNICODE),
172 re.compile(
'''^.+?episode(?P<epno>[0-9]+).*$''', re.UNICODE),
174 re.compile(
'''^.+?Season\\ (?P<seasno>[0-9]+).*.+?Episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
176 re.compile(
'''^.+?Episode\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
178 re.compile(
'''Episode\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
180 re.compile(
'''^.+?--(?P<seasno>[0-9]+)--.*$''', re.UNICODE),
191 '''Removes HTML markup from a text string.
192 @param text The HTML source.
193 @return The plain text. If the HTML source contains non-ASCII
194 entities or character references, this is a Unicode string.
202 if text[:3] ==
"&#x":
203 return chr(int(text[3:-1], 16))
205 return chr(int(text[2:-1]))
208 elif text[:1] ==
"&":
210 entity = html.entities.entitydefs.get(text[1:-1])
212 if entity[:2] ==
"&#":
214 return chr(int(entity[2:-1]))
218 return str(entity,
"iso-8859-1")
220 return self.
ampReplace(re.sub(
r"(?s)<[^>]*>|&#?\w+;", fixup, self.
textUtf8(text))).replace(
'\n',
' ')
224 def initLogger(self, path=sys.stderr, log_name='MNV_Grabber'):
225 """Setups a logger using the logging module, returns a logger object
227 logger = logging.getLogger(log_name)
228 formatter = logging.Formatter(
'%(asctime)s-%(levelname)s: %(message)s',
'%Y-%m-%dT%H:%M:%S')
230 if path == sys.stderr:
231 hdlr = logging.StreamHandler(sys.stderr)
233 hdlr = logging.FileHandler(
'%s/%s.log' % (path, log_name))
235 hdlr.setFormatter(formatter)
236 logger.addHandler(hdlr)
239 logger.setLevel(logging.DEBUG)
241 logger.setLevel(logging.INFO)
251 return str(text,
'utf8')
252 except UnicodeDecodeError:
254 except (UnicodeEncodeError, TypeError):
260 '''Replace all &, ', ", <, and > characters with the predefined XML
264 text = text.replace(
'&',
'~~~~~').replace(
'&',
'&').replace(
'~~~~~',
'&')
265 text = text.replace(
"'",
"'").replace(
'"',
'"')
266 text = text.replace(
'<',
'<').replace(
'>',
'>')
271 '''Perform the requested command line and return an array of stdout strings and
272 stderr strings if stderr=True
273 return array of stdout string array or stdout and stderr string arrays
278 p = subprocess.Popen(command, shell=
True, bufsize=4096, stdin=subprocess.PIPE,
279 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=
True)
280 except Exception
as e:
282 self.
logger.
error(
'callCommandLine Popen Exception, error(%s)' % e)
290 data = p.stderr.readline()
294 data = str(data,
'utf8')
295 except (UnicodeDecodeError):
297 except (UnicodeEncodeError, TypeError):
299 stderrarray.append(data)
302 data = p.stdout.readline()
306 data = str(data,
'utf8')
307 except (UnicodeDecodeError):
309 except (UnicodeEncodeError, TypeError):
311 stdoutarray.append(data)
314 return [stdoutarray, stderrarray]
321 '''Get longitude and latitiude to find videos relative to your location. Up to three different
322 servers will be tried before giving up.
323 return a dictionary e.g.
324 {'Latitude': '43.6667', 'Country': 'Canada', 'Longitude': '-79.4167', 'City': 'Toronto'}
325 return an empty dictionary if there were any errors
326 Code found at: http://blog.suinova.com/2009/04/from-ip-to-geolocation-country-city.html
329 '''Find the external IP address of this computer.
331 url = urllib.request.URLopener()
333 resp = url.open(
'http://www.whatismyip.com/automation/n09230945.asp')
345 gs = urllib.request.urlopen(
'http://blogama.org/ip_query.php?ip=%s&output=xml' % ip)
349 gs = urllib.request.urlopen(
'http://www.seomoz.org/ip2location/look.php?ip=%s' % ip)
353 gs = urllib.request.urlopen(
'http://api.hostip.info/?ip=%s' % ip)
356 logging.error(
'GeoIP servers not available')
359 if txt.find(
'<Response>') > 0:
360 countrys = re.findall(
r'<CountryName>([\w ]+)<',txt)[0]
361 citys = re.findall(
r'<City>([\w ]+)<',txt)[0]
362 lats,lons = re.findall(
r'<Latitude>([\d\-\.]+)</Latitude>\s*<Longitude>([\d\-\.]+)<',txt)[0]
363 elif txt.find(
'GLatLng') > 0:
364 citys,countrys = re.findall(
r'<br />\s*([^<]+)<br />\s*([^<]+)<',txt)[0]
365 lats,lons = re.findall(
r'LatLng\(([-\d\.]+),([-\d\.]+)',txt)[0]
366 elif txt.find(
'<gml:coordinates>') > 0:
367 citys = re.findall(
r'<Hostip>\s*<gml:name>(\w+)</gml:name>',txt)[0]
368 countrys = re.findall(
r'<countryName>([\w ,\.]+)</countryName>',txt)[0]
369 lats,lons = re.findall(
r'gml:coordinates>([-\d\.]+),([-\d\.]+)<',txt)[0]
371 logging.error(
'error parsing IP result %s'%txt)
373 return {
'Country':countrys,
'City':citys,
'Latitude':lats,
'Longitude':lons}
375 logging.error(
'Error parsing IP result %s'%txt)
381 """Common name for a custom HTML display. Used to interface with MythTV plugin NetVision
383 embedFlashVarFilter = etree.XPath(
'//embed', namespaces=self.
namespaces)
384 variables = self.HTMLvideocode.split(
'?')
386 url =
'%s/nv_python_libs/configs/HTML/%s' % (baseProcessingDir, variables[0])
388 customHTML = etree.parse(url)
389 except Exception
as e:
390 raise Exception(
"! Error: The Custom HTML file (%s) cause the exception error (%s)\n" % (url, errormsg))
395 for arg
in variables[1:]:
396 (attrib, key_value) = arg.split(
'/')
397 (key, value) = key_value.split(
'=')
398 embedFlashVarFilter(customHTML)[0].attrib[attrib] = embedFlashVarFilter(customHTML)[0].attrib[attrib].replace(key, value)
400 sys.stdout.write(etree.tostring(customHTML, encoding=
'UTF-8', pretty_print=
True))
407 ''' Create a MNV Channel element populated with channel details
408 return the channel element
410 mnvChannel = etree.fromstring(
"""
412 <title>%(channel_title)s</title>
413 <link>%(channel_link)s</link>
414 <description>%(channel_description)s</description>
415 <numresults>%(channel_numresults)d</numresults>
416 <returned>%(channel_returned)d</returned>
417 <startindex>%(channel_startindex)d</startindex>
426 '''Verify that a URL actually exists. Be careful as redirects can lead to false positives. Use
427 the info details to be sure.
428 return True when it exists and info
429 return False when it does not exist and info
431 urlOpened = urllib.request.urlopen(url)
432 code = urlOpened.getcode()
433 actualURL = urlOpened.geturl()
434 info = urlOpened.info()
445 ''' Fetch url data and extract the desired results using a dynamic filter or XSLT stylesheet.
446 The URLs are requested in parallel using threading
447 return the extracted data organised into directories
453 sys.stdout.write(etree.tostring(inputUrls, encoding=
'UTF-8', pretty_print=
True))
456 for element
in inputUrls.xpath(
'.//url'):
457 key = element.find(
'name').text
458 urlDictionary[key] = {}
459 urlDictionary[key][
'type'] =
'raw'
460 urlDictionary[key][
'href'] = element.find(
'href').text
461 urlFilter = element.findall(
'filter')
463 urlDictionary[key][
'type'] =
'xpath'
464 for index
in range(len(urlFilter)):
465 urlFilter[index] = urlFilter[index].text
466 urlDictionary[key][
'filter'] = urlFilter
467 urlXSLT = element.findall(
'xslt')
469 urlDictionary[key][
'type'] =
'xslt'
470 for index
in range(len(urlXSLT)):
471 urlXSLT[index] = etree.XSLT(etree.parse(
'%s/nv_python_libs/configs/XSLT/%s.xsl' % (self.
baseProcessingDir, urlXSLT[index].text)))
472 urlDictionary[key][
'xslt'] = urlXSLT
473 urlDictionary[key][
'pageFilter'] = pageFilter
474 urlDictionary[key][
'parser'] = self.
parsers[element.find(
'parserType').text].
copy()
475 urlDictionary[key][
'namespaces'] = self.
namespaces
476 urlDictionary[key][
'result'] = []
477 urlDictionary[key][
'morePages'] =
'false'
478 urlDictionary[key][
'tmp'] =
None
479 urlDictionary[key][
'tree'] =
None
480 if element.find(
'parameter')
is not None:
481 urlDictionary[key][
'parameter'] = element.find(
'parameter').text
484 print(
"urlDictionary:")
489 getURL.urlDictionary = urlDictionary
500 for key
in list(urlDictionary.keys()):
502 thread_list.append(current)
504 for thread
in thread_list:
508 root = etree.XML(
"<xml></xml>")
509 for key
in sorted(getURL.urlDictionary.keys()):
510 if not len(getURL.urlDictionary[key][
'result']):
512 results = etree.SubElement(root,
"results")
513 etree.SubElement(results,
"name").text = key
514 etree.SubElement(results,
"url").text = urlDictionary[key][
'href']
515 etree.SubElement(results,
"type").text = urlDictionary[key][
'type']
516 etree.SubElement(results,
"pageInfo").text = getURL.urlDictionary[key][
'morePages']
517 result = etree.SubElement(results,
"result")
518 if len(getURL.urlDictionary[key][
'filter']):
519 for index
in range(len(getURL.urlDictionary[key][
'result'])):
520 for element
in getURL.urlDictionary[key][
'result'][index]:
521 result.append(element)
522 elif len(getURL.urlDictionary[key][
'xslt']):
523 for index
in range(len(getURL.urlDictionary[key][
'result'])):
524 for element
in getURL.urlDictionary[key][
'result'][index].getroot():
525 result.append(element)
527 for element
in getURL.urlDictionary[key][
'result'][0].xpath(
'/*'):
528 result.append(element)
532 sys.stdout.write(etree.tostring(root, encoding=
'UTF-8', pretty_print=
True))
542 ''' Create a dictionary of functions that manipulate items data. These functions are imported
543 from other MNV grabbers. These functions are meant to be used by the MNV WebCgi type of grabber
544 which aggregates data from a number of different sources (e.g. RSS feeds and HTML Web pages)
545 including sources from other grabbers.
546 Using a dictionary facilitates mixing XSLT functions with pure python functions to use the best
547 capabilities of both technologies when translating source information into MNV compliant item
577 ''' Dynamically add functions to the function dictionary from a specified directory
581 sys.path.append(fullPath)
584 for fPath
in os.listdir(fullPath):
585 filepath, filename = os.path.split( fPath )
586 filename, ext = os.path.splitext( filename )
587 if filename ==
'__init__':
591 fileList.append(filename)
594 for fileName
in fileList:
595 filename = {
'filename': fileName, }
599 %(filename)s.common = self
600 for xpathClass in %(filename)s.__xpathClassList__:
601 exec(u'xpathClass = %(filename)s.%%s()' %% xpathClass)
602 for func in xpathClass.functList:
603 exec("self.functionDict['%%s'] = %%s" %% (func, u'xpathClass.%%s' %% func))
604 for xsltExtension in %(filename)s.__xsltExtentionList__:
605 exec("self.functionDict['%%s'] = %%s" %% (xsltExtension, u'%(filename)s.%%s' %% xsltExtension))''' % filename )
606 except Exception
as errmsg:
607 sys.stderr.write(
'! Error: Dynamic import of (%s) XPath and XSLT extention functions\nmessage(%s)\n' % (fileName, errmsg))
613 '''Convert a date/time string in a specified format into a pubDate. The default is the
615 return the formatted pubDate string
616 return on error return the original date string
619 for arg
in inputArgs:
623 index = args[0].
find(
'+')
625 index = args[0].
find(
'-')
626 if index != -1
and index > 5:
627 args[0] = args[0][:index].strip()
628 args[0] = args[0].replace(
',',
'').replace(
'.',
'')
631 args[1] = args[1].replace(
',',
'').replace(
'.',
'')
632 if args[1].
find(
'GMT') != -1:
633 args[1] = args[1][:args[1].
find(
'GMT')].strip()
634 args[0] = args[0][:args[0].rfind(
' ')].strip()
636 pubdate = time.strptime(args[0], args[1])
638 if args[1] ==
'%a %d %b %Y %H:%M:%S':
639 pubdate = time.strptime(args[0],
'%a %d %B %Y %H:%M:%S')
640 elif args[1] ==
'%a %d %B %Y %H:%M:%S':
641 pubdate = time.strptime(args[0],
'%a %d %b %Y %H:%M:%S')
643 return time.strftime(args[2], pubdate)
648 except Exception
as err:
649 sys.stderr.write(
'! Error: pubDate variables(%s) error(%s)\n' % (args, err))
654 ''' Check is there is any season or episode number information in an item's text
655 return a string of season and/or episode numbers e.g. "2_21"
656 return a string with "None_None" values
660 match = regexPattern.match(text)
663 season_episode = match.groups()
664 if len(season_episode) > 1:
665 s_e[0] = season_episode[0]
666 s_e[1] = season_episode[1]
668 s_e[1] = season_episode[0]
669 return '%s_%s' % (s_e[0], s_e[1])
670 return '%s_%s' % (s_e[0], s_e[1])
674 ''' Take a duration and convert it to seconds
675 return a string of seconds
677 min_sec = duration.split(
':')
679 for count
in range(len(min_sec)):
680 if count != len(min_sec)-1:
681 seconds+=int(min_sec[count])*(60*(len(min_sec)-count-1))
683 seconds+=int(min_sec[count])
684 return '%s' % seconds
688 ''' Take a HTML string and convert it to an HTML element. Then apply a filter and return
690 return filter value as a string
691 return an empty sting if the filter failed to find any values.
695 xpathFilter = args[0]
699 htmlElement = etree.HTML(htmldata)
702 filteredData = htmlElement.xpath(xpathFilter)
703 if len(filteredData):
704 if xpathFilter.find(
'@') != -1:
705 return filteredData[0]
707 return filteredData[0].text
712 ''' Check if there is a special local HTML page for the link. If not then return a generic
713 download only local HTML url.
714 return a file://.... link to a local HTML web page
721 return 'file://%s/nv_python_libs/configs/HTML/%s' % (self.
baseProcessingDir,
'nodownloads.html')
725 ''' Return the base directory string
726 return the base directory
733 return a lower case string
737 return data[0].lower()
742 return a upper case string
746 return data[0].upper()
750 ''' Replace substring values in a string
751 return the resulting string from a replace operation
754 for arg
in inputArgs:
756 if not len(args)
or len(args) == 1:
759 args[0] = args[0].replace(args[1],
"")
761 args[0] = args[0].replace(args[1], args[2])
762 return args[0].strip()
766 ''' Replace substring values in a string
767 return the resulting string from a replace operation
772 return urllib.parse.quote_plus(args[0].encode(
"utf-8"))
774 return urllib.parse.quote_plus(args[0].encode(
"utf-8"), args[1])
778 ''' Remove all punctuation for a string
779 return the resulting string
783 return re.sub(
'[%s]' % re.escape(string.punctuation),
'', data)
787 ''' Remove HTML tags and LFs from a string
788 return the string without HTML tags or LFs
792 return self.
massageText(html).strip().replace(
'\n',
' ').replace(
'Â’',
"'").replace(
'“',
"'")
796 ''' Return the current selected language code
803 ''' Find an 'internetcontentarticles' table record based on fields and values
804 return True if a record was found and an item element created
805 return False if no record was found
815 ''' Return an item element that was created by a previous call to the checkIfDBItem function
821 ''' Return True or False if a substring is at the beginning or end of a string
823 if arg[0] ==
'starts':
824 return arg[1].startswith(arg[2])
825 elif arg[0] ==
'ends':
826 return arg[1].endswith(arg[2])
828 index = arg[1].
find(arg[2])
836 ''' Return a list of 'internetcontentarticles' table records based on field and value matches
843 for key
in list(arg[0].keys()):
845 arg[0][key] = arg[0][key].encode(
'UTF-8')
848 return list(self.
mythdb.searchInternetContent(**arg[0]))
852 ''' Create an item element from an 'internetcontentarticles' table record dictionary
853 return the item element
856 itemElement = etree.XML(self.
mnvItem)
858 itemElement.find(
'link').text = result[
'url']
860 itemElement.find(
'title').text = result[
'title']
861 if result[
'subtitle']:
862 etree.SubElement(itemElement,
"subtitle").text = result[
'subtitle']
863 if result[
'description']:
864 itemElement.find(
'description').text = result[
'description']
866 itemElement.find(
'author').text = result[
'author']
868 itemElement.find(
'pubDate').text = result[
'date'].strftime(self.
pubDateFormat)
869 if result[
'rating'] !=
'32576' and result[
'rating'][0] !=
'-':
870 itemElement.find(
'rating').text = result[
'rating']
871 if result[
'thumbnail']:
872 self.
itemThumbnail(itemElement)[0].attrib[
'url'] = result[
'thumbnail']
873 if result[
'mediaURL']:
874 self.
itemContent(itemElement)[0].attrib[
'url'] = result[
'mediaURL']
875 if result[
'filesize'] > 0:
876 self.
itemContent(itemElement)[0].attrib[
'length'] = str(result[
'filesize'])
877 if result[
'time'] > 0:
878 self.
itemContent(itemElement)[0].attrib[
'duration'] = str(result[
'time'])
879 if result[
'width'] > 0:
880 self.
itemContent(itemElement)[0].attrib[
'width'] = str(result[
'width'])
881 if result[
'height'] > 0:
882 self.
itemContent(itemElement)[0].attrib[
'height'] = str(result[
'height'])
883 if result[
'language']:
884 self.
itemContent(itemElement)[0].attrib[
'lang'] = result[
'language']
885 if result[
'season'] > 0:
886 etree.SubElement(itemElement,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = str(result[
'season'])
887 if result[
'episode'] > 0:
888 etree.SubElement(itemElement,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = str(result[
'episode'])
889 if result[
'customhtml'] == 1:
890 etree.SubElement(itemElement,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text =
'true'
891 if result[
'countries']:
892 countries = result[
'countries'].split(
' ')
893 for country
in countries:
894 etree.SubElement(itemElement,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = country
899 ''' Import the MythTV database bindings
903 from MythTV
import MythDB, MythLog, MythError
905 '''Create an instance of each: MythDB
907 MythLog._setlevel(
'none')
909 except MythError
as e:
910 sys.stderr.write(
'\n! Error - %s\n' % e.args[0])
911 filename = os.path.expanduser(
"~")+
'/.mythtv/config.xml'
912 if not os.path.isfile(filename):
913 sys.stderr.write(
'\n! Error - A correctly configured (%s) file must exist\n' % filename)
915 sys.stderr.write(
'\n! Error - Check that (%s) is correctly configured\n' % filename)
917 except Exception
as e:
918 sys.stderr.write(
"\n! Error - Creating an instance caused an error for one of: MythDB. error(%s)\n" % e)
920 except Exception
as e:
921 sys.stderr.write(
"\n! Error - MythTV python bindings could not be imported. error(%s)\n" % e)
932 ''' Threaded download of a URL and filter out the desired data for XML and (X)HTML
933 return the filter results
936 Thread.__init__(self)
942 print(
"getURL href(%s)" % (self.urlDictionary[self.
urlKey][
'href'], ))
947 self.urlDictionary[self.
urlKey][
'tree'] = etree.parse(self.urlDictionary[self.
urlKey][
'href'], self.urlDictionary[self.
urlKey][
'parser'])
948 except Exception
as errormsg:
949 sys.stderr.write(
"! Error: The URL (%s) cause the exception error (%s)\n" % (self.urlDictionary[self.
urlKey][
'href'], errormsg))
953 print(
"Raw unfiltered URL input:")
954 sys.stdout.write(etree.tostring(self.urlDictionary[self.
urlKey][
'tree'], encoding=
'UTF-8', pretty_print=
True))
957 if len(self.urlDictionary[self.
urlKey][
'filter']):
958 for index
in range(len(self.urlDictionary[self.
urlKey][
'filter'])):
961 self.urlDictionary[self.
urlKey][
'tmp'] = self.urlDictionary[self.
urlKey][
'tree'].xpath(self.urlDictionary[self.
urlKey][
'filter'][index], namespaces=self.urlDictionary[self.
urlKey][
'namespaces'])
962 except AssertionError
as e:
963 sys.stderr.write(
"No filter results for Name(%s)\n" % self.
urlKey)
964 sys.stderr.write(
"No filter results for url(%s)\n" % self.urlDictionary[self.
urlKey][
'href'])
965 sys.stderr.write(
"! Error:(%s)\n" % e)
966 if len(self.urlDictionary[self.
urlKey][
'filter']) == index-1:
970 self.urlDictionary[self.
urlKey][
'result'].append(self.urlDictionary[self.
urlKey][
'tmp'])
971 elif len(self.urlDictionary[self.
urlKey][
'xslt']):
972 for index
in range(len(self.urlDictionary[self.
urlKey][
'xslt'])):
975 if 'parameter' in self.urlDictionary[self.
urlKey]:
976 self.urlDictionary[self.
urlKey][
'tmp'] = self.urlDictionary[self.
urlKey][
'xslt'][index](self.urlDictionary[self.
urlKey][
'tree'], paraMeter= etree.XSLT.strparam(
977 self.urlDictionary[self.
urlKey][
'parameter']) )
979 self.urlDictionary[self.
urlKey][
'tmp'] = self.urlDictionary[self.
urlKey][
'xslt'][index](self.urlDictionary[self.
urlKey][
'tree'])
980 except Exception
as e:
981 sys.stderr.write(
"! XSLT Error:(%s) Key(%s)\n" % (e, self.
urlKey))
982 if len(self.urlDictionary[self.
urlKey][
'filter']) == index-1:
987 if self.urlDictionary[self.
urlKey][
'tmp'].getroot()
is None:
988 sys.stderr.write(
"No Xslt results for Name(%s)\n" % self.
urlKey)
989 sys.stderr.write(
"No Xslt results for url(%s)\n" % self.urlDictionary[self.
urlKey][
'href'])
990 if len(self.urlDictionary[self.
urlKey][
'filter']) == index-1:
994 self.urlDictionary[self.
urlKey][
'result'].append(self.urlDictionary[self.
urlKey][
'tmp'])
997 self.urlDictionary[self.
urlKey][
'result'] = [self.urlDictionary[self.
urlKey][
'tree']]
1000 if self.urlDictionary[self.
urlKey][
'pageFilter']:
1001 if len(self.urlDictionary[self.
urlKey][
'tree'].xpath(self.urlDictionary[self.
urlKey][
'pageFilter'], namespaces=self.urlDictionary[self.
urlKey][
'namespaces'])):
1002 self.urlDictionary[self.
urlKey][
'morePages'] =
'true'