6 from bs4
import BeautifulSoup
9 __title__ =
'lyricscom'
13 UserAgent = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"}
19 self.
url =
'https://www.lyrics.com/serp.php?st=%s&qtype=2'
22 sess = requests.Session()
23 log(
'%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title), debug=self.
DEBUG)
24 lyrics = Lyrics(settings=self.
settings)
26 lyrics.source = __title__
29 request = sess.get(self.
url % urllib.parse.quote_plus(song.artist), headers=UserAgent, timeout=10)
30 response = request.text
33 soup = BeautifulSoup(response,
'html.parser')
35 for link
in soup.find_all(
'a'):
36 if link.string
and link.get(
'href').startswith(
'artist/'):
37 url =
'https://www.lyrics.com/' + link.get(
'href')
41 req = sess.get(url, headers=UserAgent, timeout=10)
45 soup = BeautifulSoup(resp,
'html.parser')
47 for link
in soup.find_all(
'a'):
48 if link.string
and (difflib.SequenceMatcher(
None, link.string.lower(), song.title.lower()).ratio() > 0.8):
49 url =
'https://www.lyrics.com' + link.get(
'href')
53 req2 = sess.get(url, headers=UserAgent, timeout=10)
57 matchcode = re.search(
'<pre.*?>(.*?)</pre>', resp2, flags=re.DOTALL)
59 lyricscode = (matchcode.group(1))
60 lyr = re.sub(
'<[^<]+?>',
'', lyricscode)
61 lyrics.lyrics = lyr.replace(
'\\n',
'\n')