13 __title__ =
"common_api - Common class libraries for all MythNetvision Mashup processing"
14 __author__=
"R.D. Vaughan"
16 This python script is intended to perform a variety of utility functions for the processing of
17 MythNetvision Grabber scripts that run as a Web application and global functions used by many
55 import os, struct, sys, re, datetime, time, subprocess, string
56 import urllib.request, urllib.parse, urllib.error
59 from threading
import Thread
61 from .common_exceptions
import (WebCgiUrlError, WebCgiHttpError, WebCgiRssError, WebCgiVideoNotFound, WebCgiXmlError, )
65 """Wraps a stream with an encoder"""
74 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
75 if isinstance(obj, str):
77 self.
out.buffer.write(obj)
80 """Delegate everything but write to the stream"""
81 return getattr(self.
out, attr)
83 if isinstance(sys.stdout, io.TextIOWrapper):
89 from io
import StringIO
90 from lxml
import etree
91 except Exception
as e:
92 sys.stderr.write(
'\n! Error - Importing the "lxml" python library failed on error(%s)\n' % e)
112 for digit
in etree.LIBXML_VERSION:
113 version+=str(digit)+
'.'
114 version = version[:-1]
115 if version <
'2.7.2':
117 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
118 At least "libxml" version 2.7.2 must be installed. Your version is (%s).
129 """A collection of common functions used by many grabbers
137 self.
baseProcessingDir = os.path.dirname( os.path.realpath( __file__ )).replace(
'/nv_python_libs/common',
'')
139 'xsi':
"http://www.w3.org/2001/XMLSchema-instance",
140 'media':
"http://search.yahoo.com/mrss/",
141 'xhtml':
"http://www.w3.org/1999/xhtml",
142 'atm':
"http://www.w3.org/2005/Atom",
143 'mythtv':
"http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format",
144 'itunes':
"http://www.itunes.com/dtds/podcast-1.0.dtd",
147 'xml': etree.XMLParser(remove_blank_text=
True),
148 'html': etree.HTMLParser(remove_blank_text=
True),
149 'xhtml': etree.HTMLParser(remove_blank_text=
True),
154 xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd"
155 xmlns:content="http://purl.org/rss/1.0/modules/content/"
156 xmlns:cnettv="http://cnettv.com/mrss/"
157 xmlns:creativeCommons="http://backend.userland.com/creativeCommonsRssModule"
158 xmlns:media="http://search.yahoo.com/mrss/"
159 xmlns:atom="http://www.w3.org/2005/Atom"
160 xmlns:amp="http://www.adobe.com/amp/1.0"
161 xmlns:dc="http://purl.org/dc/elements/1.1/"
162 xmlns:mythtv="http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format">
169 <description></description>
171 <media:group xmlns:media="http://search.yahoo.com/mrss/">
172 <media:thumbnail url=''/>
173 <media:content url='' length='' duration='' width='' height='' lang=''/>
181 re.compile(
r'''^.+?Series\\ (?P<seasno>[0-9]+).*.+?Episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
183 re.compile(
'''^.+?Series\\ (?P<seasno>[0-9]+)\\ \\-\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
185 re.compile(
'''^.+?Series\\ (?P<seasno>[0-9]+).*.+?Part\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
187 re.compile(
'''^.+?Series\\ (?P<seasno>[0-9]+)\\:\\ Programme\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
189 re.compile(
'''^.+?Series\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
191 re.compile(
'''^.+?Episode\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
193 re.compile(
'''^.+?[Ss](?P<seasno>[0-9]+).*.+?[Ee](?P<epno>[0-9]+).*$''', re.UNICODE),
195 re.compile(
'''^.+?season\\ (?P<seasno>[0-9]+).*.+?episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
197 re.compile(
'''(?P<seriesname>[^_]+)\\_(?P<seasno>[0-9]+)\\_(?P<epno>[0-9]+).*$''', re.UNICODE),
199 re.compile(
'''^.+?episode(?P<epno>[0-9]+).*$''', re.UNICODE),
201 re.compile(
'''^.+?Season\\ (?P<seasno>[0-9]+).*.+?Episode\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
203 re.compile(
'''^.+?Episode\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
205 re.compile(
'''Episode\\ (?P<seasno>[0-9]+).*$''', re.UNICODE),
207 re.compile(
'''^.+?--(?P<seasno>[0-9]+)--.*$''', re.UNICODE),
218 '''Removes HTML markup from a text string.
219 @param text The HTML source.
220 @return The plain text. If the HTML source contains non-ASCII
221 entities or character references, this is a Unicode string.
229 if text[:3] ==
"&#x":
230 return chr(int(text[3:-1], 16))
232 return chr(int(text[2:-1]))
235 elif text[:1] ==
"&":
237 entity = html.entities.entitydefs.get(text[1:-1])
239 if entity[:2] ==
"&#":
241 return chr(int(entity[2:-1]))
245 return str(entity,
"iso-8859-1")
247 return self.
ampReplace(re.sub(
r"(?s)<[^>]*>|&#?\w+;", fixup, self.
textUtf8(text))).replace(
'\n',
' ')
251 def initLogger(self, path=sys.stderr, log_name='MNV_Grabber'):
252 """Setups a logger using the logging module, returns a logger object
254 logger = logging.getLogger(log_name)
255 formatter = logging.Formatter(
'%(asctime)s-%(levelname)s: %(message)s',
'%Y-%m-%dT%H:%M:%S')
257 if path == sys.stderr:
258 hdlr = logging.StreamHandler(sys.stderr)
260 hdlr = logging.FileHandler(
'%s/%s.log' % (path, log_name))
262 hdlr.setFormatter(formatter)
263 logger.addHandler(hdlr)
266 logger.setLevel(logging.DEBUG)
268 logger.setLevel(logging.INFO)
278 return str(text,
'utf8')
279 except UnicodeDecodeError:
281 except (UnicodeEncodeError, TypeError):
287 '''Replace all &, ', ", <, and > characters with the predefined XML
291 text = text.replace(
'&',
'~~~~~').replace(
'&',
'&').replace(
'~~~~~',
'&')
292 text = text.replace(
"'",
"'").replace(
'"',
'"')
293 text = text.replace(
'<',
'<').replace(
'>',
'>')
298 '''Perform the requested command line and return an array of stdout strings and
299 stderr strings if stderr=True
300 return array of stdout string array or stdout and stderr string arrays
305 p = subprocess.Popen(command, shell=
True, bufsize=4096, stdin=subprocess.PIPE,
306 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=
True)
307 except Exception
as e:
309 self.
logger.
error(
'callCommandLine Popen Exception, error(%s)' % e)
317 data = p.stderr.readline()
321 data = str(data,
'utf8')
322 except (UnicodeDecodeError):
324 except (UnicodeEncodeError, TypeError):
326 stderrarray.append(data)
329 data = p.stdout.readline()
333 data = str(data,
'utf8')
334 except (UnicodeDecodeError):
336 except (UnicodeEncodeError, TypeError):
338 stdoutarray.append(data)
341 return [stdoutarray, stderrarray]
348 '''Get longitude and latitiude to find videos relative to your location. Up to three different
349 servers will be tried before giving up.
350 return a dictionary e.g.
351 {'Latitude': '43.6667', 'Country': 'Canada', 'Longitude': '-79.4167', 'City': 'Toronto'}
352 return an empty dictionary if there were any errors
353 Code found at: http://blog.suinova.com/2009/04/from-ip-to-geolocation-country-city.html
356 '''Find the external IP address of this computer.
358 url = urllib.request.URLopener()
360 resp = url.open(
'http://www.whatismyip.com/automation/n09230945.asp')
372 gs = urllib.request.urlopen(
'http://blogama.org/ip_query.php?ip=%s&output=xml' % ip)
376 gs = urllib.request.urlopen(
'http://www.seomoz.org/ip2location/look.php?ip=%s' % ip)
380 gs = urllib.request.urlopen(
'http://api.hostip.info/?ip=%s' % ip)
383 logging.error(
'GeoIP servers not available')
386 if txt.find(
'<Response>') > 0:
387 countrys = re.findall(
r'<CountryName>([\w ]+)<',txt)[0]
388 citys = re.findall(
r'<City>([\w ]+)<',txt)[0]
389 lats,lons = re.findall(
r'<Latitude>([\d\-\.]+)</Latitude>\s*<Longitude>([\d\-\.]+)<',txt)[0]
390 elif txt.find(
'GLatLng') > 0:
391 citys,countrys = re.findall(
r'<br />\s*([^<]+)<br />\s*([^<]+)<',txt)[0]
392 lats,lons = re.findall(
r'LatLng\(([-\d\.]+),([-\d\.]+)',txt)[0]
393 elif txt.find(
'<gml:coordinates>') > 0:
394 citys = re.findall(
r'<Hostip>\s*<gml:name>(\w+)</gml:name>',txt)[0]
395 countrys = re.findall(
r'<countryName>([\w ,\.]+)</countryName>',txt)[0]
396 lats,lons = re.findall(
r'gml:coordinates>([-\d\.]+),([-\d\.]+)<',txt)[0]
398 logging.error(
'error parsing IP result %s'%txt)
400 return {
'Country':countrys,
'City':citys,
'Latitude':lats,
'Longitude':lons}
402 logging.error(
'Error parsing IP result %s'%txt)
408 """Common name for a custom HTML display. Used to interface with MythTV plugin NetVision
410 embedFlashVarFilter = etree.XPath(
'//embed', namespaces=self.
namespaces)
411 variables = self.HTMLvideocode.split(
'?')
413 url =
'%s/nv_python_libs/configs/HTML/%s' % (baseProcessingDir, variables[0])
415 customHTML = etree.parse(url)
416 except Exception
as e:
417 raise Exception(
"! Error: The Custom HTML file (%s) cause the exception error (%s)\n" % (url, errormsg))
422 for arg
in variables[1:]:
423 (attrib, key_value) = arg.split(
'/')
424 (key, value) = key_value.split(
'=')
425 embedFlashVarFilter(customHTML)[0].attrib[attrib] = embedFlashVarFilter(customHTML)[0].attrib[attrib].replace(key, value)
427 sys.stdout.write(etree.tostring(customHTML, encoding=
'UTF-8', pretty_print=
True))
434 ''' Create a MNV Channel element populated with channel details
435 return the channel element
437 mnvChannel = etree.fromstring(
"""
439 <title>%(channel_title)s</title>
440 <link>%(channel_link)s</link>
441 <description>%(channel_description)s</description>
442 <numresults>%(channel_numresults)d</numresults>
443 <returned>%(channel_returned)d</returned>
444 <startindex>%(channel_startindex)d</startindex>
453 '''Verify that a URL actually exists. Be careful as redirects can lead to false positives. Use
454 the info details to be sure.
455 return True when it exists and info
456 return False when it does not exist and info
458 urlOpened = urllib.request.urlopen(url)
459 code = urlOpened.getcode()
460 actualURL = urlOpened.geturl()
461 info = urlOpened.info()
472 ''' Fetch url data and extract the desired results using a dynamic filter or XSLT stylesheet.
473 The URLs are requested in parallel using threading
474 return the extracted data organised into directories
480 sys.stdout.write(etree.tostring(inputUrls, encoding=
'UTF-8', pretty_print=
True))
483 for element
in inputUrls.xpath(
'.//url'):
484 key = element.find(
'name').text
485 urlDictionary[key] = {}
486 urlDictionary[key][
'type'] =
'raw'
487 urlDictionary[key][
'href'] = element.find(
'href').text
488 urlFilter = element.findall(
'filter')
490 urlDictionary[key][
'type'] =
'xpath'
491 for index
in range(len(urlFilter)):
492 urlFilter[index] = urlFilter[index].text
493 urlDictionary[key][
'filter'] = urlFilter
494 urlXSLT = element.findall(
'xslt')
496 urlDictionary[key][
'type'] =
'xslt'
497 for index
in range(len(urlXSLT)):
498 urlXSLT[index] = etree.XSLT(etree.parse(
'%s/nv_python_libs/configs/XSLT/%s.xsl' % (self.
baseProcessingDir, urlXSLT[index].text)))
499 urlDictionary[key][
'xslt'] = urlXSLT
500 urlDictionary[key][
'pageFilter'] = pageFilter
501 urlDictionary[key][
'parser'] = self.
parsers[element.find(
'parserType').text].
copy()
502 urlDictionary[key][
'namespaces'] = self.
namespaces
503 urlDictionary[key][
'result'] = []
504 urlDictionary[key][
'morePages'] =
'false'
505 urlDictionary[key][
'tmp'] =
None
506 urlDictionary[key][
'tree'] =
None
507 if element.find(
'parameter')
is not None:
508 urlDictionary[key][
'parameter'] = element.find(
'parameter').text
511 print(
"urlDictionary:")
516 getURL.urlDictionary = urlDictionary
527 for key
in list(urlDictionary.keys()):
529 thread_list.append(current)
531 for thread
in thread_list:
535 root = etree.XML(
"<xml></xml>")
536 for key
in sorted(getURL.urlDictionary.keys()):
537 if not len(getURL.urlDictionary[key][
'result']):
539 results = etree.SubElement(root,
"results")
540 etree.SubElement(results,
"name").text = key
541 etree.SubElement(results,
"url").text = urlDictionary[key][
'href']
542 etree.SubElement(results,
"type").text = urlDictionary[key][
'type']
543 etree.SubElement(results,
"pageInfo").text = getURL.urlDictionary[key][
'morePages']
544 result = etree.SubElement(results,
"result")
545 if len(getURL.urlDictionary[key][
'filter']):
546 for index
in range(len(getURL.urlDictionary[key][
'result'])):
547 for element
in getURL.urlDictionary[key][
'result'][index]:
548 result.append(element)
549 elif len(getURL.urlDictionary[key][
'xslt']):
550 for index
in range(len(getURL.urlDictionary[key][
'result'])):
551 for element
in getURL.urlDictionary[key][
'result'][index].getroot():
552 result.append(element)
554 for element
in getURL.urlDictionary[key][
'result'][0].xpath(
'/*'):
555 result.append(element)
559 sys.stdout.write(etree.tostring(root, encoding=
'UTF-8', pretty_print=
True))
569 ''' Create a dictionary of functions that manipulate items data. These functions are imported
570 from other MNV grabbers. These functions are meant to be used by the MNV WebCgi type of grabber
571 which aggregates data from a number of different sources (e.g. RSS feeds and HTML Web pages)
572 including sources from other grabbers.
573 Using a dictionary facilitates mixing XSLT functions with pure python functions to use the best
574 capabilities of both technologies when translating source information into MNV compliant item
604 ''' Dynamically add functions to the function dictionary from a specified directory
608 sys.path.append(fullPath)
611 for fPath
in os.listdir(fullPath):
612 filepath, filename = os.path.split( fPath )
613 filename, ext = os.path.splitext( filename )
614 if filename ==
'__init__':
618 fileList.append(filename)
621 for fileName
in fileList:
622 filename = {
'filename': fileName, }
626 %(filename)s.common = self
627 for xpathClass in %(filename)s.__xpathClassList__:
628 exec(u'xpathClass = %(filename)s.%%s()' %% xpathClass)
629 for func in xpathClass.functList:
630 exec("self.functionDict['%%s'] = %%s" %% (func, u'xpathClass.%%s' %% func))
631 for xsltExtension in %(filename)s.__xsltExtentionList__:
632 exec("self.functionDict['%%s'] = %%s" %% (xsltExtension, u'%(filename)s.%%s' %% xsltExtension))''' % filename )
633 except Exception
as errmsg:
634 sys.stderr.write(
'! Error: Dynamic import of (%s) XPath and XSLT extention functions\nmessage(%s)\n' % (fileName, errmsg))
640 '''Convert a date/time string in a specified format into a pubDate. The default is the
642 return the formatted pubDate string
643 return on error return the original date string
646 for arg
in inputArgs:
650 index = args[0].
find(
'+')
652 index = args[0].
find(
'-')
653 if index != -1
and index > 5:
654 args[0] = args[0][:index].strip()
655 args[0] = args[0].replace(
',',
'').replace(
'.',
'')
658 args[1] = args[1].replace(
',',
'').replace(
'.',
'')
659 if args[1].
find(
'GMT') != -1:
660 args[1] = args[1][:args[1].
find(
'GMT')].strip()
661 args[0] = args[0][:args[0].rfind(
' ')].strip()
663 pubdate = time.strptime(args[0], args[1])
665 if args[1] ==
'%a %d %b %Y %H:%M:%S':
666 pubdate = time.strptime(args[0],
'%a %d %B %Y %H:%M:%S')
667 elif args[1] ==
'%a %d %B %Y %H:%M:%S':
668 pubdate = time.strptime(args[0],
'%a %d %b %Y %H:%M:%S')
670 return time.strftime(args[2], pubdate)
675 except Exception
as err:
676 sys.stderr.write(
'! Error: pubDate variables(%s) error(%s)\n' % (args, err))
681 ''' Check is there is any season or episode number information in an item's text
682 return a string of season and/or episode numbers e.g. "2_21"
683 return a string with "None_None" values
687 match = regexPattern.match(text)
690 season_episode = match.groups()
691 if len(season_episode) > 1:
692 s_e[0] = season_episode[0]
693 s_e[1] = season_episode[1]
695 s_e[1] = season_episode[0]
696 return '%s_%s' % (s_e[0], s_e[1])
697 return '%s_%s' % (s_e[0], s_e[1])
701 ''' Take a duration and convert it to seconds
702 return a string of seconds
704 min_sec = duration.split(
':')
706 for count
in range(len(min_sec)):
707 if count != len(min_sec)-1:
708 seconds+=int(min_sec[count])*(60*(len(min_sec)-count-1))
710 seconds+=int(min_sec[count])
711 return '%s' % seconds
715 ''' Take a HTML string and convert it to an HTML element. Then apply a filter and return
717 return filter value as a string
718 return an empty sting if the filter failed to find any values.
722 xpathFilter = args[0]
726 htmlElement = etree.HTML(htmldata)
729 filteredData = htmlElement.xpath(xpathFilter)
730 if len(filteredData):
731 if xpathFilter.find(
'@') != -1:
732 return filteredData[0]
734 return filteredData[0].text
739 ''' Check if there is a special local HTML page for the link. If not then return a generic
740 download only local HTML url.
741 return a file://.... link to a local HTML web page
748 return 'file://%s/nv_python_libs/configs/HTML/%s' % (self.
baseProcessingDir,
'nodownloads.html')
752 ''' Return the base directory string
753 return the base directory
760 return a lower case string
764 return data[0].lower()
769 return a upper case string
773 return data[0].upper()
777 ''' Replace substring values in a string
778 return the resulting string from a replace operation
781 for arg
in inputArgs:
783 if not len(args)
or len(args) == 1:
786 args[0] = args[0].replace(args[1],
"")
788 args[0] = args[0].replace(args[1], args[2])
789 return args[0].strip()
793 ''' Replace substring values in a string
794 return the resulting string from a replace operation
799 return urllib.parse.quote_plus(args[0].encode(
"utf-8"))
801 return urllib.parse.quote_plus(args[0].encode(
"utf-8"), args[1])
805 ''' Remove all punctuation for a string
806 return the resulting string
810 return re.sub(
'[%s]' % re.escape(string.punctuation),
'', data)
814 ''' Remove HTML tags and LFs from a string
815 return the string without HTML tags or LFs
819 return self.
massageText(html).strip().replace(
'\n',
' ').replace(
'Â’',
"'").replace(
'“',
"'")
823 ''' Return the current selected language code
830 ''' Find an 'internetcontentarticles' table record based on fields and values
831 return True if a record was found and an item element created
832 return False if no record was found
842 ''' Return an item element that was created by a previous call to the checkIfDBItem function
848 ''' Return True or False if a substring is at the beginning or end of a string
850 if arg[0] ==
'starts':
851 return arg[1].startswith(arg[2])
852 elif arg[0] ==
'ends':
853 return arg[1].endswith(arg[2])
855 index = arg[1].
find(arg[2])
863 ''' Return a list of 'internetcontentarticles' table records based on field and value matches
870 for key
in list(arg[0].keys()):
872 arg[0][key] = arg[0][key].encode(
'UTF-8')
875 return list(self.
mythdb.searchInternetContent(**arg[0]))
879 ''' Create an item element from an 'internetcontentarticles' table record dictionary
880 return the item element
883 itemElement = etree.XML(self.
mnvItem)
885 itemElement.find(
'link').text = result[
'url']
887 itemElement.find(
'title').text = result[
'title']
888 if result[
'subtitle']:
889 etree.SubElement(itemElement,
"subtitle").text = result[
'subtitle']
890 if result[
'description']:
891 itemElement.find(
'description').text = result[
'description']
893 itemElement.find(
'author').text = result[
'author']
895 itemElement.find(
'pubDate').text = result[
'date'].strftime(self.
pubDateFormat)
896 if result[
'rating'] !=
'32576' and result[
'rating'][0] !=
'-':
897 itemElement.find(
'rating').text = result[
'rating']
898 if result[
'thumbnail']:
899 self.
itemThumbnail(itemElement)[0].attrib[
'url'] = result[
'thumbnail']
900 if result[
'mediaURL']:
901 self.
itemContent(itemElement)[0].attrib[
'url'] = result[
'mediaURL']
902 if result[
'filesize'] > 0:
903 self.
itemContent(itemElement)[0].attrib[
'length'] = str(result[
'filesize'])
904 if result[
'time'] > 0:
905 self.
itemContent(itemElement)[0].attrib[
'duration'] = str(result[
'time'])
906 if result[
'width'] > 0:
907 self.
itemContent(itemElement)[0].attrib[
'width'] = str(result[
'width'])
908 if result[
'height'] > 0:
909 self.
itemContent(itemElement)[0].attrib[
'height'] = str(result[
'height'])
910 if result[
'language']:
911 self.
itemContent(itemElement)[0].attrib[
'lang'] = result[
'language']
912 if result[
'season'] > 0:
913 etree.SubElement(itemElement,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = str(result[
'season'])
914 if result[
'episode'] > 0:
915 etree.SubElement(itemElement,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = str(result[
'episode'])
916 if result[
'customhtml'] == 1:
917 etree.SubElement(itemElement,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text =
'true'
918 if result[
'countries']:
919 countries = result[
'countries'].split(
' ')
920 for country
in countries:
921 etree.SubElement(itemElement,
"{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = country
926 ''' Import the MythTV database bindings
930 from MythTV
import MythDB, MythLog, MythError
932 '''Create an instance of each: MythDB
934 MythLog._setlevel(
'none')
936 except MythError
as e:
937 sys.stderr.write(
'\n! Error - %s\n' % e.args[0])
938 filename = os.path.expanduser(
"~")+
'/.mythtv/config.xml'
939 if not os.path.isfile(filename):
940 sys.stderr.write(
'\n! Error - A correctly configured (%s) file must exist\n' % filename)
942 sys.stderr.write(
'\n! Error - Check that (%s) is correctly configured\n' % filename)
944 except Exception
as e:
945 sys.stderr.write(
"\n! Error - Creating an instance caused an error for one of: MythDB. error(%s)\n" % e)
947 except Exception
as e:
948 sys.stderr.write(
"\n! Error - MythTV python bindings could not be imported. error(%s)\n" % e)
959 ''' Threaded download of a URL and filter out the desired data for XML and (X)HTML
960 return the filter results
963 Thread.__init__(self)
969 print(
"getURL href(%s)" % (self.urlDictionary[self.
urlKey][
'href'], ))
974 self.urlDictionary[self.
urlKey][
'tree'] = etree.parse(self.urlDictionary[self.
urlKey][
'href'], self.urlDictionary[self.
urlKey][
'parser'])
975 except Exception
as errormsg:
976 sys.stderr.write(
"! Error: The URL (%s) cause the exception error (%s)\n" % (self.urlDictionary[self.
urlKey][
'href'], errormsg))
980 print(
"Raw unfiltered URL input:")
981 sys.stdout.write(etree.tostring(self.urlDictionary[self.
urlKey][
'tree'], encoding=
'UTF-8', pretty_print=
True))
984 if len(self.urlDictionary[self.
urlKey][
'filter']):
985 for index
in range(len(self.urlDictionary[self.
urlKey][
'filter'])):
988 self.urlDictionary[self.
urlKey][
'tmp'] = self.urlDictionary[self.
urlKey][
'tree'].xpath(self.urlDictionary[self.
urlKey][
'filter'][index], namespaces=self.urlDictionary[self.
urlKey][
'namespaces'])
989 except AssertionError
as e:
990 sys.stderr.write(
"No filter results for Name(%s)\n" % self.
urlKey)
991 sys.stderr.write(
"No filter results for url(%s)\n" % self.urlDictionary[self.
urlKey][
'href'])
992 sys.stderr.write(
"! Error:(%s)\n" % e)
993 if len(self.urlDictionary[self.
urlKey][
'filter']) == index-1:
997 self.urlDictionary[self.
urlKey][
'result'].append(self.urlDictionary[self.
urlKey][
'tmp'])
998 elif len(self.urlDictionary[self.
urlKey][
'xslt']):
999 for index
in range(len(self.urlDictionary[self.
urlKey][
'xslt'])):
1002 if 'parameter' in self.urlDictionary[self.
urlKey]:
1003 self.urlDictionary[self.
urlKey][
'tmp'] = self.urlDictionary[self.
urlKey][
'xslt'][index](self.urlDictionary[self.
urlKey][
'tree'], paraMeter= etree.XSLT.strparam(
1004 self.urlDictionary[self.
urlKey][
'parameter']) )
1006 self.urlDictionary[self.
urlKey][
'tmp'] = self.urlDictionary[self.
urlKey][
'xslt'][index](self.urlDictionary[self.
urlKey][
'tree'])
1007 except Exception
as e:
1008 sys.stderr.write(
"! XSLT Error:(%s) Key(%s)\n" % (e, self.
urlKey))
1009 if len(self.urlDictionary[self.
urlKey][
'filter']) == index-1:
1014 if self.urlDictionary[self.
urlKey][
'tmp'].getroot()
is None:
1015 sys.stderr.write(
"No Xslt results for Name(%s)\n" % self.
urlKey)
1016 sys.stderr.write(
"No Xslt results for url(%s)\n" % self.urlDictionary[self.
urlKey][
'href'])
1017 if len(self.urlDictionary[self.
urlKey][
'filter']) == index-1:
1021 self.urlDictionary[self.
urlKey][
'result'].append(self.urlDictionary[self.
urlKey][
'tmp'])
1024 self.urlDictionary[self.
urlKey][
'result'] = [self.urlDictionary[self.
urlKey][
'tree']]
1027 if self.urlDictionary[self.
urlKey][
'pageFilter']:
1028 if len(self.urlDictionary[self.
urlKey][
'tree'].xpath(self.urlDictionary[self.
urlKey][
'pageFilter'], namespaces=self.urlDictionary[self.
urlKey][
'namespaces'])):
1029 self.urlDictionary[self.
urlKey][
'morePages'] =
'true'