MythTV master
lyricsScraper.py
Go to the documentation of this file.
1#-*- coding: UTF-8 -*-
2'''
3Scraper for https://www.rclyricsband.com/
4'''
5
6import requests
7import re
8import html
9import difflib
10from bs4 import BeautifulSoup
11from lib.utils import *
12
13__title__ = "RCLyricsBand"
14__priority__ = '130'
15__lrc__ = True
16
17UserAgent = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"}
18
19class LyricsFetcher:
20 def __init__(self, *args, **kwargs):
21 self.DEBUG = kwargs['debug']
22 self.settings = kwargs['settings']
23 self.SEARCH_URL = 'https://rclyricsband.com/'
24 self.LYRIC_URL = 'https://rclyricsband.com/%s'
25
26
27 def get_lyrics(self, song):
28 log("%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title), debug=self.DEBUG)
29 lyrics = Lyrics(settings=self.settings)
30 lyrics.song = song
31 lyrics.source = __title__
32 lyrics.lrc = __lrc__
33 artist = song.artist
34 title = song.title
35 try:
36 url = self.SEARCH_URL
37 searchdata = {}
38 searchdata['search'] = '%s %s' % (artist, title)
39 search = requests.post(url, data=searchdata, headers=UserAgent, timeout=10)
40 response = search.text
41 except:
42 return None
43 links = []
44 soup = BeautifulSoup(response, 'html.parser')
45 for link in soup.find_all('a', {'class': 'song_search'}):
46 if link.string:
47 foundsong = link.string.split(' - ')[0]
48 foundartist = link.string.split(' - ')[-1]
49 if (difflib.SequenceMatcher(None, artist.lower(), foundartist.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, title.lower(), foundsong.lower()).ratio() > 0.8):
50 links.append((foundartist + ' - ' + foundsong, self.LYRIC_URL % link.get('href'), foundartist, foundsong))
51 if len(links) == 0:
52 return None
53 elif len(links) > 1:
54 lyrics.list = links
55 for link in links:
56 lyr = self.get_lyrics_from_list(link)
57 if lyr:
58 lyrics.lyrics = lyr
59 return lyrics
60 return None
61
62 def get_lyrics_from_list(self, link):
63 title,url,artist,song = link
64 try:
65 log('%s: search url: %s' % (__title__, url), debug=self.DEBUG)
66 search = requests.get(url, headers=UserAgent, timeout=10)
67 response = search.text
68 except:
69 return None
70 matchcode = re.search("lrc_text_format'>(.*?)</p", response, flags=re.DOTALL)
71 if matchcode:
72 lyricscode = (matchcode.group(1))
73 cleanlyrics = re.sub('<br>', '\n', lyricscode)
74 cleanlyrics = html.unescape(cleanlyrics)
75 return cleanlyrics
def get_lyrics_from_list(self, link)
def __init__(self, *args, **kwargs)
def get_lyrics(self, song)
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9