3Scraper for https://www.rclyricsband.com/
10from bs4
import BeautifulSoup
13__title__ =
"RCLyricsBand"
17UserAgent = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"}
21 self.
DEBUG = kwargs[
'debug']
24 self.
LYRIC_URL =
'https://rclyricsband.com/%s'
28 log(
"%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title), debug=self.
DEBUG)
29 lyrics = Lyrics(settings=self.
settings)
31 lyrics.source = __title__
38 searchdata[
'search'] =
'%s %s' % (artist, title)
39 search = requests.post(url, data=searchdata, headers=UserAgent, timeout=10)
40 response = search.text
44 soup = BeautifulSoup(response,
'html.parser')
45 for link
in soup.find_all(
'a', {
'class':
'song_search'}):
47 foundsong = link.string.split(
' - ')[0]
48 foundartist = link.string.split(
' - ')[-1]
49 if (difflib.SequenceMatcher(
None, artist.lower(), foundartist.lower()).ratio() > 0.8)
and (difflib.SequenceMatcher(
None, title.lower(), foundsong.lower()).ratio() > 0.8):
50 links.append((foundartist +
' - ' + foundsong, self.
LYRIC_URL % link.get(
'href'), foundartist, foundsong))
63 title,url,artist,song = link
65 log(
'%s: search url: %s' % (__title__, url), debug=self.
DEBUG)
66 search = requests.get(url, headers=UserAgent, timeout=10)
67 response = search.text
70 matchcode = re.search(
"lrc_text_format'>(.*?)</p", response, flags=re.DOTALL)
72 lyricscode = (matchcode.group(1))
73 cleanlyrics = re.sub(
'<br>',
'\n', lyricscode)
74 cleanlyrics = html.unescape(cleanlyrics)
def get_lyrics_from_list(self, link)
def __init__(self, *args, **kwargs)
def get_lyrics(self, song)
None log(str msg, int level=LOGDEBUG)