MythTV master
lyricsScraper.py
Go to the documentation of this file.
1#-*- coding: UTF-8 -*-
2import sys
3import re
4import urllib.parse
5import requests
6import html
7import xbmc
8import xbmcaddon
9import json
10import difflib
11from lib.utils import *
12
13__title__ = 'genius'
14__priority__ = '200'
15__lrc__ = False
16
17
19 def __init__(self, *args, **kwargs):
20 self.DEBUG = kwargs['debug']
21 self.settings = kwargs['settings']
22 self.url = 'http://api.genius.com/search?q=%s%%20%s&access_token=Rq_cyNZ6fUOQr4vhyES6vu1iw3e94RX85ju7S8-0jhM-gftzEvQPG7LJrrnTji11'
23
24 def get_lyrics(self, song):
25 log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title), debug=self.DEBUG)
26 lyrics = Lyrics(settings=self.settings)
27 lyrics.song = song
28 lyrics.source = __title__
29 lyrics.lrc = __lrc__
30 try:
31 headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; rv:77.0) Gecko/20100101 Firefox/77.0'}
32 url = self.url % (urllib.parse.quote(song.artist), urllib.parse.quote(song.title))
33 req = requests.get(url, headers=headers, timeout=10)
34 response = req.text
35 except:
36 return None
37 data = json.loads(response)
38 try:
39 name = data['response']['hits'][0]['result']['primary_artist']['name']
40 track = data['response']['hits'][0]['result']['title']
41 if (difflib.SequenceMatcher(None, song.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, song.title.lower(), track.lower()).ratio() > 0.8):
42 self.page = data['response']['hits'][0]['result']['url']
43 else:
44 return None
45 except:
46 return None
47 log('%s: search url: %s' % (__title__, self.page), debug=self.DEBUG)
48 try:
49 headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; rv:77.0) Gecko/20100101 Firefox/77.0'}
50 req = requests.get(self.page, headers=headers, timeout=10)
51 response = req.text
52 except:
53 return None
54 response = html.unescape(response)
55 matchcode = re.findall('class="Lyrics__Container.*?">(.*?)</div><div', response, flags=re.DOTALL)
56 try:
57 lyricscode = ""
58 for matchCodeItem in matchcode:
59 lyricscode = lyricscode + matchCodeItem
60 lyr1 = re.sub('<br/>', '\n', lyricscode)
61 lyr2 = re.sub('<[^<]+?>', '', lyr1)
62 lyr3 = lyr2.replace('\\n','\n').strip()
63 if not lyr3 or lyr3 == '[Instrumental]' or lyr3.startswith('Lyrics for this song have yet to be released'):
64 return None
65 lyrics.lyrics = lyr3
66 return lyrics
67 except:
68 return None
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9