MythTV  master
giantbomb_api.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3 # ----------------------
4 # Name: giantbomb_api.py Simple-to-use Python interface to the GiantBomb's API (api.giantbomb.com)
5 # Python Script
6 # Author: R.D. Vaughan
7 # Purpose: This python script is intended to perform a variety of utility functions to search and
8 # access text metadata and image URLs from GiantBomb. These routines are based on the
9 # GiantBomb api. Specifications for this api are published at:
10 # http://api.giantbomb.com/documentation/
11 #
12 # License:Creative Commons GNU GPL v2
13 # (http://creativecommons.org/licenses/GPL/2.0/)
14 #-------------------------------------
15 __title__ ="giantbomb_api - Simple-to-use Python interface to The GiantBomb's API (api.giantbomb.com)";
16 __author__="R.D. Vaughan"
17 __purpose__='''
18 This python script is intended to perform a variety of utility functions to search and access text
19 metadata and image URLs from GiantBomb. These routines are based on the GiantBomb api. Specifications
20 for this api are published at http://api.giantbomb.com/documentation/
21 '''
22 
23 __version__="v0.1.0"
24 # 0.1.0 Initial development
25 
26 import os, struct, sys, datetime, time, re
27 import urllib
28 from copy import deepcopy
29 
30 from giantbomb_exceptions import (GiantBombBaseError, GiantBombHttpError, GiantBombXmlError, GiantBombGameNotFound,)
31 
32 
33 class OutStreamEncoder(object):
34  """Wraps a stream with an encoder"""
35  def __init__(self, outstream, encoding=None):
36  self.out = outstream
37  if not encoding:
38  self.encoding = sys.getfilesystemencoding()
39  else:
40  self.encoding = encoding
41 
42  def write(self, obj):
43  """Wraps the output stream, encoding Unicode strings with the specified encoding"""
44  if isinstance(obj, unicode):
45  try:
46  self.out.write(obj.encode(self.encoding))
47  except IOError:
48  pass
49  else:
50  try:
51  self.out.write(obj)
52  except IOError:
53  pass
54 
55  def __getattr__(self, attr):
56  """Delegate everything but write to the stream"""
57  return getattr(self.out, attr)
58 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
59 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
60 
61 
62 try:
63  from StringIO import StringIO
64  from lxml import etree
65 except Exception, e:
66  sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
67  sys.exit(1)
68 
69 # Check that the lxml library is current enough
70 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
71 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
72 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
73 version = ''
74 for digit in etree.LIBXML_VERSION:
75  version+=str(digit)+'.'
76 version = version[:-1]
77 if version < '2.7.2':
78  sys.stderr.write(u'''
79 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
80  At least "libxml" version 2.7.2 must be installed. Your version is (%s).
81 ''' % version)
82  sys.exit(1)
83 
84 
85 class gamedbQueries():
86  '''Methods that query api.giantbomb.com for metadata and outputs the results to stdout any errors are output
87  to stderr.
88  '''
89  def __init__(self,
90  apikey,
91  debug = False,
92  ):
93  """apikey (str/unicode):
94  Specify the api.giantbomb.com API key. Applications need their own key.
95  See http://api.giantbomb.com to get your own API key
96 
97  debug (True/False):
98  shows verbose debugging information
99  """
100  self.config = {}
101 
102  self.config['apikey'] = apikey
103  self.config['debug'] = debug
104 
105  self.config['searchURL'] = u'http://api.giantbomb.com/search/?api_key=%s&offset=0&query=%%s&resources=game&format=xml' % self.config['apikey']
106  self.config['dataURL'] = u'http://api.giantbomb.com/game/%%s/?api_key=%s&format=xml' % self.config['apikey']
108  self.error_messages = {'GiantBombHttpError': u"! Error: A connection error to api.giantbomb.com was raised (%s)\n", 'GiantBombXmlError': u"! Error: Invalid XML was received from api.giantbomb.com (%s)\n", 'GiantBombBaseError': u"! Error: An error was raised (%s)\n", }
110  self.baseProcessingDir = os.path.dirname( os.path.realpath( __file__ ))
111 
112  self.pubDateFormat = u'%a, %d %b %Y %H:%M:%S GMT'
113  self.xmlParser = etree.XMLParser(remove_blank_text=True)
114 
115  self.supportedJobList = ["actor", "author", "producer", "executive producer", "director", "cinematographer", "composer", "editor", "casting", "voice actor", "music", "writer", "technical director", "design director", ]
116  self.tagTranslations = {
117  'actor': 'Actor',
118  'author': 'Author',
119  'producer': 'Producer',
120  'executive producer': 'Executive Producer',
121  'director': 'Director',
122  'cinematographer': 'Cinematographer',
123  'composer': 'Composer',
124  'editor': 'Editor',
125  'casting': 'Casting',
126  'voice actor': 'Actor',
127  'music': 'Composer',
128  'writer': 'Author',
129  'technical director': 'Director',
130  'design director': 'Director',
131  }
132  # end __init__()
133 
134 
135  def massageText(self, text):
136  '''Removes HTML markup from a text string.
137  @param text The HTML source.
138  @return The plain text. If the HTML source contains non-ASCII
139  entities or character references, this is a Unicode string.
140  '''
141  def fixup(m):
142  text = m.group(0)
143  if text[:1] == "<":
144  return "" # ignore tags
145  if text[:2] == "&#":
146  try:
147  if text[:3] == "&#x":
148  return unichr(int(text[3:-1], 16))
149  else:
150  return unichr(int(text[2:-1]))
151  except ValueError:
152  pass
153  elif text[:1] == "&":
154  import htmlentitydefs
155  entity = htmlentitydefs.entitydefs.get(text[1:-1])
156  if entity:
157  if entity[:2] == "&#":
158  try:
159  return unichr(int(entity[2:-1]))
160  except ValueError:
161  pass
162  else:
163  return unicode(entity, "iso-8859-1")
164  return text # leave as is
165  return self.ampReplace(re.sub(u"(?s)<[^>]*>|&#?\w+;", fixup, self.textUtf8(text))).replace(u'\n',u' ')
166  # end massageText()
167 
168 
169  def textUtf8(self, text):
170  if text == None:
171  return text
172  try:
173  return unicode(text, 'utf8')
174  except UnicodeDecodeError:
175  return u''
176  except (UnicodeEncodeError, TypeError):
177  return text
178  # end textUtf8()
179 
180 
181  def ampReplace(self, text):
182  '''Replace all "&" characters with "&amp;"
183  '''
184  text = self.textUtf8(text)
185  return text.replace(u'&amp;',u'~~~~~').replace(u'&',u'&amp;').replace(u'~~~~~', u'&amp;')
186  # end ampReplace()
187 
188 
189  def htmlToString(self, context, html):
190  ''' Remove HTML tags and LFs from a string
191  return the string without HTML tags or LFs
192  '''
193  if not len(html):
194  return u""
195  return self.massageText(html).strip().replace(u'\n', u' ').replace(u'’', u"&apos;").replace(u'“', u"&apos;")
196  # end htmlToString()
197 
198  def getHtmlData(self, context, *args):
199  ''' Take a HTML string and convert it to an HTML element. Then apply a filter and return
200  the results.
201  return filter array
202  return an empty array if the filter failed to find any values.
203  '''
204  xpathFilter = None
205  if len(args) > 1:
206  xpathFilter = args[0]
207  htmldata = args[1]
208  else:
209  htmldata = args[0]
210  if not htmldata:
211  return []
212  htmlElement = etree.HTML(htmldata)
213  if not xpathFilter:
214  return htmlElement
215  filteredData = htmlElement.xpath(xpathFilter)
216  if len(filteredData):
217  if xpathFilter.find('@') != -1:
218  return filteredData[0]
219  else:
220  return filteredData[0].text
221  return u''
222  # end getHtmlData()
223 
224  def pubDate(self, context, *inputArgs):
225  '''Convert a date/time string in a specified format into a pubDate. The default is the
226  MNV item format
227  return the formatted pubDate string
228  return on error return the original date string
229  '''
230  args = []
231  for arg in inputArgs:
232  args.append(arg)
233  if args[0] == u'':
234  return datetime.datetime.now().strftime(self.pubDateFormat)
235  index = args[0].find('+')
236  if index == -1:
237  index = args[0].find('-')
238  if index != -1 and index > 5:
239  args[0] = args[0][:index].strip()
240  args[0] = args[0].replace(',', u'').replace('.', u'')
241  try:
242  if len(args) > 1:
243  args[1] = args[1].replace(',', u'').replace('.', u'')
244  if args[1].find('GMT') != -1:
245  args[1] = args[1][:args[1].find('GMT')].strip()
246  args[0] = args[0][:args[0].rfind(' ')].strip()
247  try:
248  pubdate = time.strptime(args[0], args[1])
249  except ValueError:
250  if args[1] == '%a %d %b %Y %H:%M:%S':
251  pubdate = time.strptime(args[0], '%a %d %B %Y %H:%M:%S')
252  elif args[1] == '%a %d %B %Y %H:%M:%S':
253  pubdate = time.strptime(args[0], '%a %d %b %Y %H:%M:%S')
254  if len(args) > 2:
255  return time.strftime(args[2], pubdate)
256  else:
257  return time.strftime(self.pubDateFormat, pubdate)
258  else:
259  return datetime.datetime.now().strftime(self.pubDateFormat)
260  except Exception, err:
261  sys.stderr.write(u'! Error: pubDate variables(%s) error(%s)\n' % (args, err))
262  return args[0]
263  # end pubDate()
264 
265  def futureReleaseDate(self, context, gameElement):
266  '''Convert the "expected" release date into the default MNV item format.
267  return the formatted pubDate string
268  return If there is not enough information to make a date then return an empty string
269  '''
270  try:
271  if gameElement.find('expected_release_year').text != None:
272  year = gameElement.find('expected_release_year').text
273  else:
274  year = None
275  if gameElement.find('expected_release_quarter').text != None:
276  quarter = gameElement.find('expected_release_quarter').text
277  else:
278  quarter = None
279  if gameElement.find('expected_release_month').text != None:
280  month = gameElement.find('expected_release_month').text
281  else:
282  month = None
283  except:
284  return u''
285  if not year:
286  return u''
287  if month and not quarter:
288  pubdate = time.strptime((u'%s-%s-01' % (year, month)), '%Y-%m-%d')
289  elif not month and quarter:
290  month = str((int(quarter)*3))
291  pubdate = time.strptime((u'%s-%s-01' % (year, month)), '%Y-%m-%d')
292  else:
293  pubdate = time.strptime((u'%s-12-01' % (year, )), '%Y-%m-%d')
294 
295  return time.strftime('%Y-%m-%d', pubdate)
296  # end futureReleaseDate()
297 
298  def findImages(self, context, *args):
299  '''Parse the "image" and "description" elements for images and put in a persistant array
300  return True when there are images available
301  return False if there are no images
302  '''
303  def makeImageElement(typeImage, url, thumb):
304  ''' Create a single Image element
305  return the image element
306  '''
307  imageElement = etree.XML(u"<image></image>")
308  imageElement.attrib['type'] = typeImage
309  imageElement.attrib['url'] = url
310  imageElement.attrib['thumb'] = thumb
311  return imageElement
312  # end makeImageElement()
313 
314  superImageFilter = etree.XPath('.//super_url/text()')
315  self.imageElements = []
316  for imageElement in args[0]:
317  imageList = superImageFilter(imageElement)
318  if len(imageList):
319  for image in imageList:
320  self.imageElements.append(makeImageElement('coverart', image, image.replace(u'super', u'thumb')))
321  htmlElement = self.getHtmlData('dummy', etree.tostring(args[1][0], method="text", encoding=unicode).strip())
322  if len(htmlElement):
323  for image in htmlElement[0].xpath('.//a/img/@src'):
324  if image.find('screen') == -1:
325  continue
326  if image.find('thumb') == -1:
327  continue
328  self.imageElements.append(makeImageElement('screenshot', image.replace(u'thumb', u'super'), image))
329 
330  if len(args) > 2:
331  for imageElement in args[2]:
332  imageList = superImageFilter(imageElement)
333  if len(imageList):
334  for image in imageList:
335  self.imageElements.append(makeImageElement('screenshot', image, image.replace(u'super', u'thumb')))
336 
337  if not len(self.imageElements):
338  return False
339  return True
340  # end findImages()
341 
342  def getImages(self, context, arg):
343  '''Return an array of image elements that was created be a previous "findImages" function call
344  return the array of image elements
345  '''
346  return self.imageElements
347  # end getImages()
348 
349  def supportedJobs(self, context, *inputArgs):
350  '''Validate that the job category is supported by the
351  Universal Metadata Format item format
352  return True is supported
353  return False if not supported
354  '''
355  if type([]) == type(inputArgs[0]):
356  tmpCopy = inputArgs[0]
357  else:
358  tmpCopy = [inputArgs[0]]
359  for job in tmpCopy:
360  if job.lower() in self.supportedJobList:
361  return True
362  return False
363  # end supportedJobs()
364 
365  def translateName(self, context, *inputArgs):
366  '''Translate a tag name into the Universal Metadata Format item equivalent
367  return the translated tag equivalent
368  return the input name as the name does not need translating and is already been validated
369  '''
370  name = inputArgs[0]
371  name = name.lower()
372  if name in self.tagTranslations.keys():
373  return self.tagTranslations[name]
374  return name
375  # end translateName()
376 
377  def buildFuncDict(self):
378  """ Build a dictionary of the XPath extention function for the XSLT stylesheets
379  Returns nothing
380  """
381  self.FuncDict = {
382  'htmlToString': self.htmlToString,
383  'getHtmlData': self.getHtmlData,
384  'pubDate': self.pubDate,
385  'futureReleaseDate': self.futureReleaseDate,
386  'findImages': self.findImages,
387  'getImages': self.getImages,
388  'supportedJobs': self.supportedJobs,
389  'translateName': self.translateName,
390  }
391  return
392  # end buildFuncDict()
393 
394  def gameSearch(self, gameTitle):
395  """Display a Game query in XML format:
396  http://www.mythtv.org/wiki/MythTV_Universal_Metadata_Format
397  Returns nothing
398  """
399  url = self.config['searchURL'] % urllib.quote_plus(gameTitle.encode("utf-8"))
400  if self.config['debug']:
401  print "URL(%s)" % url
402  print
403 
404  try:
405  queryResult = etree.parse(url, parser=self.xmlParser)
406  except Exception, errmsg:
407  sys.stderr.write(u"! Error: Invalid XML was received from www.giantbomb.com (%s)\n" % errmsg)
408  sys.exit(1)
409 
410  queryXslt = etree.XSLT(etree.parse(u'%s/XSLT/giantbombQuery.xsl' % self.baseProcessingDir))
411  gamebombXpath = etree.FunctionNamespace('http://www.mythtv.org/wiki/MythTV_Universal_Metadata_Format')
412  gamebombXpath.prefix = 'gamebombXpath'
413  self.buildFuncDict()
414  for key in self.FuncDict.keys():
415  gamebombXpath[key] = self.FuncDict[key]
416 
417  items = queryXslt(queryResult)
418 
419  if items.getroot() != None:
420  if len(items.xpath('//item')):
421  sys.stdout.write(etree.tostring(items, encoding='UTF-8', method="xml", xml_declaration=True, pretty_print=True, ))
422  sys.exit(0)
423  # end gameSearch()
424 
425  def gameData(self, gameId):
426  """Display a Game details in XML format:
427  http://www.mythtv.org/wiki/MythTV_Universal_Metadata_Format
428  Returns nothing
429  """
430  url = self.config['dataURL'] % gameId
431  if self.config['debug']:
432  print "URL(%s)" % url
433  print
434 
435  try:
436  videoResult = etree.parse(url, parser=self.xmlParser)
437  except Exception, errmsg:
438  sys.stderr.write(u"! Error: Invalid XML was received from www.giantbomb.com (%s)\n" % errmsg)
439  sys.exit(1)
440 
441  gameXslt = etree.XSLT(etree.parse(u'%s/XSLT/giantbombGame.xsl' % self.baseProcessingDir))
442  gamebombXpath = etree.FunctionNamespace('http://www.mythtv.org/wiki/MythTV_Universal_Metadata_Format')
443  gamebombXpath.prefix = 'gamebombXpath'
444  self.buildFuncDict()
445  for key in self.FuncDict.keys():
446  gamebombXpath[key] = self.FuncDict[key]
447  items = gameXslt(videoResult)
448 
449  if items.getroot() != None:
450  if len(items.xpath('//item')):
451  sys.stdout.write(etree.tostring(items, encoding='UTF-8', method="xml", xml_declaration=True, pretty_print=True, ))
452  sys.exit(0)
453  # end gameData()
454 
455 # end Class gamedbQueries()
456 
457 def main():
458  """Simple example of using giantbomb_api - it just
459  searches for any Game with the word "Grand" in its title and returns a list of matches
460  in Universal XML format. Also gets game details using a GameBomb#.
461  """
462  # api.giantbomb.com api key provided for MythTV
463  apikey = "b5883a902a8ed88b15ce21d07787c94fd6ad9f33"
464  gamebomb = gamedbQueries(api_key)
465  # Output a dictionary of matching movie titles
466  gamebomb.gameSearch(u'Grand')
467  print
468  # Output a dictionary of matching movie details for GiantBomb number '19995'
469  gamebomb.gameData(u'19995')
470 # end main()
471 
472 if __name__ == '__main__':
473  main()
def translateName(self, context, *inputArgs)
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)
def getHtmlData(self, context, *args)
def supportedJobs(self, context, *inputArgs)
def findImages(self, context, *args)
def pubDate(self, context, *inputArgs)
def htmlToString(self, context, html)
def __init__(self, outstream, encoding=None)
def getImages(self, context, arg)
def __init__(self, apikey, debug=False)
def futureReleaseDate(self, context, gameElement)