MythTV master
lyricsScraper.py
Go to the documentation of this file.
1#-*- coding: UTF-8 -*-
2import sys
3import re
4import requests
5import html
6import xbmc
7import xbmcaddon
8from lib.utils import *
9
10__title__ = 'supermusic'
11__priority__ = '250'
12__lrc__ = False
13
14
16 def __init__(self, *args, **kwargs):
17 self.DEBUG = kwargs['debug']
18 self.settings = kwargs['settings']
19
20 def get_lyrics(self, song):
21 log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title), debug=self.DEBUG)
22 lyrics = Lyrics(settings=self.settings)
23 lyrics.song = song
24 lyrics.source = __title__
25 lyrics.lrc = __lrc__
26 artist = song.artist.lower()
27 title = song.title.lower()
28
29 try:
30 req = requests.post('https://supermusic.cz/najdi.php', data={'hladane': title, 'typhladania': 'piesen', 'fraza': 'off'})
31 response = req.text
32 except:
33 return None
34 req.close()
35 url = None
36 try:
37 items = re.search(r'Počet nájdených piesní.+<br><br>(.*)<BR>', response, re.S).group(1)
38 for match in re.finditer(r'<a href=(?P<url>"[^"]+?") target="_parent"><b>(?P<artist>.*?)</b></a> - (?P<type>.+?) \‍(<a href', items):
39 matched_url, matched_artist, matched_type = match.groups()
40 if matched_type not in ('text', 'akordy a text'):
41 continue
42 if matched_artist.lower() == artist:
43 url = matched_url.strip('"')
44 break
45 except:
46 return None
47
48 if not url:
49 return None
50
51 try:
52 req = requests.get('https://supermusic.cz/%s' % url)
53 response = req.text
54 lyr = re.search(r'class=piesen>(.*?)</font>', response, re.S).group(1)
55 lyr = re.sub(r'<sup>.*?</sup>', '', lyr)
56 lyr = re.sub(r'<br\s*/>\s*', '\n', lyr)
57 lyr = re.sub(r'<!--.*?-->', '', lyr, flags=re.DOTALL)
58 lyr = re.sub(r'<[^>]*?>', '', lyr, flags=re.DOTALL)
59 lyr = lyr.strip('\r\n')
60 lyr = html.unescape(lyr)
61 lyrics.lyrics = lyr
62 return lyrics
63 except:
64 return None
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9