MythTV master
lyricsScraper.py
Go to the documentation of this file.
1#-*- coding: UTF-8 -*-
2import sys
3import re
4import json
5import requests
6from urllib.error import HTTPError
7import urllib.parse
8from html.parser import HTMLParser
9import xbmc
10import xbmcaddon
11from lib.utils import *
12
13__title__ = 'lyricwiki'
14__priority__ = '200'
15__lrc__ = False
16
17LIC_TXT = 'we are not licensed to display the full lyrics for this song at the moment'
18
19
21 def __init__(self, *args, **kwargs):
22 self.DEBUG = kwargs['debug']
23 self.settings = kwargs['settings']
24 self.url = 'http://lyrics.wikia.com/api.php?func=getSong&artist=%s&song=%s&fmt=realjson'
25
26 def get_lyrics(self, song):
27 log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title), debug=self.DEBUG)
28 lyrics = Lyrics(settings=self.settings)
29 lyrics.song = song
30 lyrics.source = __title__
31 lyrics.lrc = __lrc__
32 try:
33 req = requests.get(self.url % (urllib.parse.quote(song.artist), urllib.parse.quote(song.title)), timeout=10)
34 response = req.text
35 except:
36 return None
37 data = json.loads(response)
38 try:
39 self.page = data['url']
40 except:
41 return None
42 if not self.page.endswith('action=edit'):
43 log('%s: search url: %s' % (__title__, self.page), debug=self.DEBUG)
44 try:
45 req = requests.get(self.page, timeout=10)
46 response = req.text
47 except requests.exceptions.HTTPError as error: # strange... sometimes lyrics are returned with a 404 error
48 if error.response.status_code == 404:
49 response = error.response.text
50 else:
51 return None
52 except:
53 return None
54 matchcode = re.search("class='lyricbox'>(.*?)<div", response)
55 try:
56 lyricscode = (matchcode.group(1))
57 htmlparser = HTMLParser()
58 lyricstext = htmlparser.unescape(lyricscode).replace('<br />', '\n')
59 lyr = re.sub('<[^<]+?>', '', lyricstext)
60 if LIC_TXT in lyr:
61 return None
62 lyrics.lyrics = lyr
63 return lyrics
64 except:
65 return None
66 else:
67 return None
def __init__(self, *args, **kwargs)
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9