MythTV master
youtube_api.py
Go to the documentation of this file.
1# -*- coding: UTF-8 -*-
2
3# ----------------------
4# Name: youtube_api - Simple-to-use Python interface to the youtube API (http://www.youtube.com/)
5# Python Script
6# Author: R.D. Vaughan
7# Purpose: This python script is intended to perform a variety of utility functions to search and access text
8# metadata, video and image URLs from youtube. These routines are based on the api. Specifications
9# for this api are published at http://developer.youtubenservices.com/docs
10#
11# License:Creative Commons GNU GPL v2
12# (http://creativecommons.org/licenses/GPL/2.0/)
13#-------------------------------------
14__title__ ="youtube_api - Simple-to-use Python interface to the youtube API (http://developer.youtubenservices.com/docs)"
15__author__="R.D. Vaughan"
16__purpose__='''
17This python script is intended to perform a variety of utility functions to search and access text
18meta data, video and image URLs from youtube. These routines are based on the api. Specifications
19for this api are published at http://developer.youtubenservices.com/docs
20'''
21
22__version__="v0.3.0"
23# 0.1.0 Initial development
24# 0.1.1 Added Tree view display option
25# 0.1.2 Modified Tree view internals to be consistent in approach and structure.
26# 0.1.3 Added images for directories
27# 0.1.4 Documentation review
28# 0.2.0 Public release
29# 0.2.1 New python bindings conversion
30# Better exception error reporting
31# Better handling of invalid unicode data from source
32# 0.2.2 Completed exception error reporting improvements
33# Removed the use of the feedparser library
34# 0.2.3 Fixed an exception message output code error in two places
35# 0.2.4 Removed the need for python MythTV bindings and added "%SHAREDIR%" to icon directory path
36# 0.2.5 Fixed the Foreign Film icon file name
37# 0.3.0 Adapted to the v3 API
38
39import os, struct, sys, re, time, shutil
40import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse
41import json
42import logging
43from MythTV import MythXML
44from ..common import common_api
45
46from .youtube_exceptions import (YouTubeUrlError, YouTubeHttpError, YouTubeRssError, YouTubeVideoNotFound, YouTubeInvalidSearchType, YouTubeXmlError, YouTubeVideoDetailError, YouTubeCategoryNotFound)
47from .youtube_data import getData
48
49try:
50 import aniso8601
51except:
52 sys.stderr.write("The module aniso8601 could not be imported, duration "
53 "parsing will be disabled\n")
54 pass
55
56
58 """Deals with retrieval of JSON data from API
59 """
60 def __init__(self, url):
61 self.url = url
62
63 def getJson(self):
64 try:
65 urlhandle = urllib.request.urlopen(self.url)
66 return json.load(urlhandle)
67 except IOError as errormsg:
68 raise YouTubeHttpError(errormsg)
69
70
71class Videos(object):
72 """Main interface to http://www.youtube.com/
73 This is done to support a common naming framework for all python Netvision plugins no matter their site
74 target.
75
76 Supports search methods
77 """
78 def __init__(self,
79 apikey,
80 mythtv = True,
81 interactive = False,
82 select_first = False,
83 debug = False,
84 custom_ui = None,
85 language = None,
86 search_all_languages = False,
87 ):
88 """apikey (str/unicode):
89 Specify the target site API key. Applications need their own key in some cases
90
91 mythtv (True/False):
92 When True, the returned meta data is being returned has the key and values massaged to match MythTV
93 When False, the returned meta data is being returned matches what target site returned
94
95 interactive (True/False): (This option is not supported by all target site apis)
96 When True, uses built-in console UI is used to select the correct show.
97 When False, the first search result is used.
98
99 select_first (True/False): (This option is not supported currently implemented in any grabbers)
100 Automatically selects the first series search result (rather
101 than showing the user a list of more than one series).
102 Is overridden by interactive = False, or specifying a custom_ui
103
104 debug (True/False):
105 shows verbose debugging information
106
107 custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers)
108 A callable subclass of interactive class (overrides interactive option)
109
110 language (2 character language abbreviation): (This option is not supported by all target site apis)
111 The language of the returned data. Is also the language search
112 uses. Default is "en" (English). For full list, run..
113
114 search_all_languages (True/False): (This option is not supported by all target site apis)
115 By default, a Netvision grabber will only search in the language specified using
116 the language option. When this is True, it will search for the
117 show in any language
118
119 """
120 self.config = {}
121 self.common = common_api.Common()
122 self.mythxml = MythXML()
123
124 self.config['debug_enabled'] = debug # show debugging messages
125
126 self.log_name = "youtube"
127 self.log = self._initLogger() # Setups the logger (self.log.debug() etc)
128
129 self.config['custom_ui'] = custom_ui
130
131 self.config['interactive'] = interactive # prompt for correct series?
132
133 self.config['select_first'] = select_first
134
135 self.config['search_all_languages'] = search_all_languages
136
138 {'YouTubeUrlError': "! Error: The URL (%s) cause the exception error (%s)\n",
139 'YouTubeHttpError': "! Error: An HTTP communications error with YouTube was raised (%s)\n",
140 'YouTubeRssError': "! Error: Invalid RSS meta data\nwas received from YouTube error (%s). Skipping item.\n",
141 'YouTubeVideoNotFound': "! Error: Video search with YouTube did not return any results (%s)\n",
142 'YouTubeVideoDetailError': "! Error: Invalid Video meta data detail\nwas received from YouTube error (%s). Skipping item.\n", }
143
144 # This is an example that must be customized for each target site
146 [{'channel_title': 'channel_title',
147 'channel_link': 'channel_link',
148 'channel_description': 'channel_description',
149 'channel_numresults': 'channel_numresults',
150 'channel_returned': 'channel_returned',
151 'channel_startindex': 'channel_startindex'},
152 {'title': 'item_title',
153 'author': 'item_author',
154 'published_parsed': 'item_pubdate',
155 'media_description': 'item_description',
156 'video': 'item_link',
157 'thumbnail': 'item_thumbnail',
158 'link': 'item_url',
159 'duration': 'item_duration',
160 'rating': 'item_rating',
161 'item_width': 'item_width',
162 'item_height': 'item_height',
163 'language': 'item_lang'}]
164
165 # Defaulting to no language specified. The YouTube apis does support specifying a language
166 if language:
167 self.config['language'] = language
168 else:
169 self.config['language'] = ''
170
171 self.getUserPreferences("~/.mythtv/MythNetvision/userGrabberPrefs/youtube.xml")
172
173 # Read region code from user preferences, used by tree view
174 region = self.userPrefs.find("region")
175 if region is not None and region.text:
176 self.config['region'] = region.text
177 else:
178 self.config['region'] = 'us'
179
180 self.apikey = getData().update(getData().a)
181
182 apikey = self.userPrefs.find("apikey")
183 if apikey is not None and apikey.text:
184 self.apikey = apikey.text
185
186 self.feed_icons = {
187 'Film & Animation': 'directories/topics/movies',
188 'Movies': 'directories/topics/movies',
189 'Trailers': 'directories/topics/movies',
190 'Sports': 'directories/topics/sports',
191 'News & Politics': 'directories/topics/news',
192 'Science & Technology': 'directories/topics/technology',
193 'Education': 'directories/topics/education',
194 'Howto & Style': 'directories/topics/howto',
195 'Music': 'directories/topics/music',
196 'Gaming': 'directories/topics/games',
197 'Entertainment': 'directories/topics/entertainment',
198 'Autos & Vehicles': 'directories/topics/automotive',
199 'Pets & Animals': 'directories/topics/animals',
200 'Travel & Events': 'directories/topics/travel',
201 'People & Blogs': 'directories/topics/people',
202 }
203
204 self.treeview = False
205 self.channel_icon = '%SHAREDIR%/mythnetvision/icons/youtube.png'
206 # end __init__()
207
208 def getUserPreferences(self, userPreferenceFilePath):
209 userPreferenceFilePath = os.path.expanduser(userPreferenceFilePath)
210
211 # If the user config file does not exists then copy one the default
212 if not os.path.isfile(userPreferenceFilePath):
213 # Make the necessary directories if they do not already exist
214 prefDir = os.path.dirname(userPreferenceFilePath)
215 if not os.path.isdir(prefDir):
216 os.makedirs(prefDir)
217
218 fileName = os.path.basename(userPreferenceFilePath)
219 defaultConfig = '%s/nv_python_libs/configs/XML/defaultUserPrefs/%s' \
220 % (baseProcessingDir, fileName)
221 shutil.copy2(defaultConfig, userPreferenceFilePath)
222
223 # Read the grabber hulu_config.xml configuration file
224 url = 'file://%s' % userPreferenceFilePath
225 if self.config['debug_enabled']:
226 print(url)
227 print()
228 try:
229 self.userPrefs = self.common.etree.parse(url)
230 except Exception as e:
231 raise Exception(url, e)
232
233
238
240 '''Get longitude and latitiude to find videos relative to your location. Up to three different
241 servers will be tried before giving up.
242 return a dictionary e.g.
243 {'Latitude': '43.6667', 'Country': 'Canada', 'Longitude': '-79.4167', 'City': 'Toronto'}
244 return an empty dictionary if there were any errors
245 Code found at: http://blog.suinova.com/2009/04/from-ip-to-geolocation-country-city.html
246 '''
247 def getExternalIP():
248 '''Find the external IP address of this computer.
249 '''
250 url = urllib.request.URLopener()
251 try:
252 resp = url.open('http://www.whatismyip.com/automation/n09230945.asp')
253 return resp.read()
254 except:
255 return None
256 # end getExternalIP()
257
258 ip = getExternalIP()
259
260 if ip is None:
261 return {}
262
263 try:
264 gs = urllib.request.urlopen('http://blogama.org/ip_query.php?ip=%s&output=xml' % ip)
265 txt = gs.read()
266 except:
267 try:
268 gs = urllib.request.urlopen('http://www.seomoz.org/ip2location/look.php?ip=%s' % ip)
269 txt = gs.read()
270 except:
271 try:
272 gs = urllib.request.urlopen('http://api.hostip.info/?ip=%s' % ip)
273 txt = gs.read()
274 except:
275 logging.error('GeoIP servers not available')
276 return {}
277 try:
278 if txt.find('<Response>') > 0:
279 countrys = re.findall(r'<CountryName>([\w ]+)<',txt)[0]
280 citys = re.findall(r'<City>([\w ]+)<',txt)[0]
281 lats,lons = re.findall(r'<Latitude>([\d\-\.]+)</Latitude>\s*<Longitude>([\d\-\.]+)<',txt)[0]
282 elif txt.find('GLatLng') > 0:
283 citys,countrys = re.findall(r'<br />\s*([^<]+)<br />\s*([^<]+)<',txt)[0]
284 lats,lons = re.findall(r'LatLng\‍(([-\d\.]+),([-\d\.]+)',txt)[0]
285 elif txt.find('<gml:coordinates>') > 0:
286 citys = re.findall(r'<Hostip>\s*<gml:name>(\w+)</gml:name>',txt)[0]
287 countrys = re.findall(r'<countryName>([\w ,\.]+)</countryName>',txt)[0]
288 lats,lons = re.findall(r'gml:coordinates>([-\d\.]+),([-\d\.]+)<',txt)[0]
289 else:
290 logging.error('error parsing IP result %s'%txt)
291 return {}
292 return {'Country':countrys,'City':citys,'Latitude':lats,'Longitude':lons}
293 except:
294 logging.error('Error parsing IP result %s'%txt)
295 return {}
296 # end detectUserLocationByIP()
297
298
299 def massageDescription(self, text):
300 '''Removes HTML markup from a text string.
301 @param text The HTML source.
302 @return The plain text. If the HTML source contains non-ASCII
303 entities or character references, this is a Unicode string.
304 '''
305 def fixup(m):
306 text = m.group(0)
307 if text[:1] == "<":
308 return "" # ignore tags
309 if text[:2] == "&#":
310 try:
311 if text[:3] == "&#x":
312 return chr(int(text[3:-1], 16))
313 else:
314 return chr(int(text[2:-1]))
315 except ValueError:
316 pass
317 elif text[:1] == "&":
318 import html.entities
319 entity = html.entities.entitydefs.get(text[1:-1])
320 if entity:
321 if entity[:2] == "&#":
322 try:
323 return chr(int(entity[2:-1]))
324 except ValueError:
325 pass
326 else:
327 return str(entity, "iso-8859-1")
328 return text # leave as is
329 return self.common.ampReplace(re.sub(r"(?s)<[^>]*>|&#?\w+;", fixup, self.common.textUtf8(text)))
330 # end massageDescription()
331
332 def _initLogger(self):
333 """Setups a logger using the logging module, returns a log object
334 """
335 logger = logging.getLogger(self.log_name)
336 formatter = logging.Formatter('%(asctime)s) %(levelname)s %(message)s')
337
338 hdlr = logging.StreamHandler(sys.stdout)
339
340 hdlr.setFormatter(formatter)
341 logger.addHandler(hdlr)
342
343 if self.config['debug_enabled']:
344 logger.setLevel(logging.DEBUG)
345 else:
346 logger.setLevel(logging.WARNING)
347 return logger
348 #end initLogger
349
350 def setTreeViewIcon(self, dir_icon=None):
351 '''Check if there is a specific generic tree view icon. If not default to the channel icon.
352 return self.tree_dir_icon
353 '''
355 if not dir_icon:
356 if self.tree_key not in self.feed_icons:
357 return self.tree_dir_icon
358 dir_icon = self.feed_icons[self.tree_key]
359 if not dir_icon:
360 return self.tree_dir_icon
361 self.tree_dir_icon = '%%SHAREDIR%%/mythnetvision/icons/%s.png' % (dir_icon, )
362 return self.tree_dir_icon
363 # end setTreeViewIcon()
364
365
370
371
372 def searchTitle(self, title, pagenumber, pagelen):
373 '''Key word video search of the YouTube web site
374 return an array of matching item dictionaries
375 return
376 '''
377 # Special case where the grabber has been executed without any page
378 # argument
379 if 1 == pagenumber:
380 pagenumber = ""
381
382 result = self.getSearchResults(title, pagenumber, pagelen)
383 if not result:
384 raise YouTubeVideoNotFound("No YouTube Video matches found for search value (%s)" % title)
385
386 self.channel['channel_numresults'] = int(result['pageInfo']['totalResults'])
387 if 'nextPageToken' in result:
388 self.channel['nextpagetoken'] = result['nextPageToken']
389 if 'prevPageToken' in result:
390 self.channel['prevpagetoken'] = result['prevPageToken']
391
392 ids = [entry['id']['videoId'] for entry in result['items']]
393
394 result = self.getVideoDetails(ids)
395 data = [self.parseDetails(entry) for entry in result['items']]
396
397 if not len(data):
398 raise YouTubeVideoNotFound("No YouTube Video matches found for search value (%s)" % title)
399
400 return data
401 # end searchTitle()
402
403 def getSearchResults(self, title, pagenumber, pagelen):
404 url = ('https://www.googleapis.com/youtube/v3/search?part=snippet&' + \
405 'type=video&q=%s&maxResults=%s&order=relevance&' + \
406 'videoEmbeddable=true&key=%s&pageToken=%s') % \
407 (urllib.parse.quote_plus(title.encode("utf-8")), pagelen, self.apikey,
408 pagenumber)
409 if self.config['debug_enabled']:
410 print(url)
411 print()
412
413 try:
414 return JsonHandler(url).getJson()
415 except Exception as errormsg:
416 raise YouTubeUrlError(self.error_messages['YouTubeUrlError'] % (url, errormsg))
417
418 def getVideoDetails(self, ids):
419 url = 'https://www.googleapis.com/youtube/v3/videos?part=id,snippet,' + \
420 'contentDetails&key=%s&id=%s' % (self.apikey, ",".join(ids))
421 try:
422 return JsonHandler(url).getJson()
423 except Exception as errormsg:
424 raise YouTubeUrlError(self.error_messages['YouTubeUrlError'] % (url, errormsg))
425
426 def parseDetails(self, entry):
427 item = {}
428 try:
429 item['id'] = entry['id']
430 item['video'] = \
431 self.mythxml.getInternetContentUrl("nv_python_libs/configs/HTML/youtube.html", \
432 item['id'])
433 item['link'] = item['video']
434 snippet = entry['snippet']
435 item['title'] = snippet['title']
436 item['media_description'] = snippet['description']
437 item['thumbnail'] = snippet['thumbnails']['high']['url']
438 item['author'] = snippet['channelTitle']
439 item['published_parsed'] = snippet['publishedAt']
440
441 try:
442 duration = aniso8601.parse_duration(entry['contentDetails']['duration'])
443 item['duration'] = duration.days * 24 * 3600 + duration.seconds
444 except Exception:
445 pass
446
447 for key in list(item.keys()):
448 # Make sure there are no item elements that are None
449 if item[key] is None:
450 item[key] = ''
451 elif key == 'published_parsed': # 2010-01-23T08:38:39.000Z
452 if item[key]:
453 pub_time = time.strptime(item[key].strip(), "%Y-%m-%dT%H:%M:%SZ")
454 item[key] = time.strftime('%a, %d %b %Y %H:%M:%S GMT', pub_time)
455 elif key == 'media_description' or key == 'title':
456 # Strip the HTML tags
457 if item[key]:
458 item[key] = self.massageDescription(item[key].strip())
459 item[key] = item[key].replace('|', '-')
460 elif type(item[key]) == type(''):
461 if item[key]:
462 item[key] = self.common.ampReplace(item[key].replace('"\n',' ').strip())
463 except KeyError:
464 pass
465
466 return item
467
468 def searchForVideos(self, title, pagenumber):
469 """Common name for a video search. Used to interface with MythTV plugin NetVision
470 """
471 # Channel details and search results
472 self.channel = {
473 'channel_title': 'YouTube',
474 'channel_link': 'http://www.youtube.com/',
475 'channel_description': "Share your videos with friends, family, and the world.",
476 'channel_numresults': 0,
477 'channel_returned': 1,
478 'channel_startindex': 0}
479
480 # Easier for debugging
481# print self.searchTitle(title, pagenumber, self.page_limit)
482# print
483# sys.exit()
484
485 try:
486 data = self.searchTitle(title, pagenumber, self.page_limit)
487 except YouTubeVideoNotFound as msg:
488 sys.stderr.write("%s\n" % msg)
489 return None
490 except YouTubeUrlError as msg:
491 sys.stderr.write('%s\n' % msg)
492 sys.exit(1)
493 except YouTubeHttpError as msg:
494 sys.stderr.write(self.error_messages['YouTubeHttpError'] % msg)
495 sys.exit(1)
496 except YouTubeRssError as msg:
497 sys.stderr.write(self.error_messages['YouTubeRssError'] % msg)
498 sys.exit(1)
499 except Exception as e:
500 sys.stderr.write("! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
501 sys.exit(1)
502
503 if data is None:
504 return None
505 if not len(data):
506 return None
507
508 items = [self.translateItem(match) for match in data]
509 self.channel['channel_returned'] = len(items)
510
511 if len(items):
512 return [[self.channel, items]]
513 return None
514 # end searchForVideos()
515
516 def translateItem(self, item):
517 item_data = {}
518 for key in list(self.key_translation[1].keys()):
519 if key in list(item.keys()):
520 item_data[self.key_translation[1][key]] = item[key]
521 else:
522 item_data[self.key_translation[1][key]] = ''
523 return item_data
524
526 '''Gather the Youtube categories/feeds/...etc then get a max page of videos meta data in each of them
527 return array of directories and their video metadata
528 '''
529 # Channel details and search results
530 self.channel = {
531 'channel_title': 'YouTube',
532 'channel_link': 'http://www.youtube.com/',
533 'channel_description': "Share your videos with friends, family, and the world.",
534 'channel_numresults': 0,
535 'channel_returned': 1,
536 'channel_startindex': 0}
537
538 etree = self.getVideoCategories()
539 if etree is None:
540 raise YouTubeCategoryNotFound("No YouTube Categories found for Tree view")
541
542 feed_names = {}
543 for category in etree['items']:
544 snippet = category['snippet']
545 feed_names[snippet['title']] = self.common.ampReplace(category['id'])
546
547 # Get videos within each category
548 dictionaries = []
549
550 # Process the various video feeds/categories/... etc
551 for category in feed_names:
552 self.tree_key = category
553 dictionaries = self.getVideosForCategory(feed_names[category], dictionaries)
554
555 return [[self.channel, dictionaries]]
556 # end displayTreeView()
557
559 try:
560 url = 'https://www.googleapis.com/youtube/v3/videoCategories?' + \
561 'part=snippet&regionCode=%s&key=%s' % \
562 (self.config['region'], self.apikey)
563 return JsonHandler(url).getJson()
564 except Exception as errormsg:
565 raise YouTubeUrlError(self.error_messages['YouTubeUrlError'] % (url, errormsg))
566
567 def getVideosForCategory(self, categoryId, dictionaries):
568 '''Parse a list made of category lists and retrieve video meta data
569 return a dictionary of directory names and categories video metadata
570 '''
571 url = 'https://www.googleapis.com/youtube/v3/videos?part=snippet&' + \
572 'chart=mostPopular&videoCategoryId=%s&maxResults=%s&key=%s' % \
573 (categoryId, self.page_limit, self.apikey)
574 temp_dictionary = []
575 temp_dictionary = self.getVideosForURL(url, temp_dictionary)
576 for element in temp_dictionary:
577 dictionaries.append(element)
578 return dictionaries
579 # end getVideosForCategory()
580
581 def getVideosForURL(self, url, dictionaries):
582 '''Get the video metadata for url search
583 return the video dictionary of directories and their video mata data
584 '''
585 initial_length = len(dictionaries)
586
587 if self.config['debug_enabled']:
588 print("Category URL:")
589 print(url)
590 print()
591
592 try:
593 result = JsonHandler(url).getJson()
594 except Exception as errormsg:
595 sys.stderr.write(self.error_messages['YouTubeUrlError'] % (url, errormsg))
596 return dictionaries
597
598 if result is None:
599 sys.stderr.write('1-No Videos for (%s)\n' % self.feed)
600 return dictionaries
601
602 if 'pageInfo' not in result or 'items' not in result:
603 return dictionaries
604
605 dictionary_first = False
606 self.channel['channel_numresults'] += int(result['pageInfo']['totalResults'])
607 self.channel['channel_startindex'] = self.page_limit
608 self.channel['channel_returned'] = len(result['items'])
609 for entry in result['items']:
610 item = self.parseDetails(entry)
611
612 if not dictionary_first: # Add the dictionaries display name
613 dictionaries.append([self.massageDescription(self.tree_key),
614 self.setTreeViewIcon()])
615 dictionary_first = True
616
617 dictionaries.append(self.translateItem(item))
618
619 if initial_length < len(dictionaries): # Need to check if there was any items for this Category
620 dictionaries.append(['', '']) # Add the nested dictionary indicator
621 return dictionaries
622 # end getVideosForURL()
623# end Videos() class
def getVideosForURL(self, url, dictionaries)
Definition: youtube_api.py:581
def searchForVideos(self, title, pagenumber)
Definition: youtube_api.py:468
def getUserPreferences(self, userPreferenceFilePath)
Definition: youtube_api.py:208
def detectUserLocationByIP(self)
Start - Utility functions.
Definition: youtube_api.py:239
def __init__(self, apikey, mythtv=True, interactive=False, select_first=False, debug=False, custom_ui=None, language=None, search_all_languages=False)
Definition: youtube_api.py:87
def getSearchResults(self, title, pagenumber, pagelen)
Definition: youtube_api.py:403
def setTreeViewIcon(self, dir_icon=None)
Definition: youtube_api.py:350
def searchTitle(self, title, pagenumber, pagelen)
End of Utility functions.
Definition: youtube_api.py:372
def getVideosForCategory(self, categoryId, dictionaries)
Definition: youtube_api.py:567
static pid_list_t::iterator find(const PIDInfoMap &map, pid_list_t &list, pid_list_t::iterator begin, pid_list_t::iterator end, bool find_open)
static void print(const QList< uint > &raw_minimas, const QList< uint > &raw_maximas, const QList< float > &minimas, const QList< float > &maximas)