3Scraper for https://www.lyricsify.com/
9from bs4
import BeautifulSoup
12__title__ =
"Lyricsify"
16UserAgent = {
"Host":
"www.lyricsify.com",
"User-Agent":
"Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
"Accept":
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language":
"en-US,en;q=0.5",
"Accept-Encoding":
"gzip, deflate, br, zstd",
"DNT":
"1",
"Alt-Used":
"www.lyricsify.com",
"Connection":
"keep-alive",
"Upgrade-Insecure-Requests":
"1",
"Sec-Fetch-Dest":
"document",
"Sec-Fetch-Mode":
"navigate",
"Sec-Fetch-Site":
"none",
"Sec-Fetch-User":
"?1",
"Priority":
"u=1"}
24 self.
SEARCH_URL =
'https://www.lyricsify.com/lyrics/%s/%s'
28 log(
"%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title), debug=self.
DEBUG)
29 lyrics = Lyrics(settings=self.
settings)
31 lyrics.source = __title__
33 artist = song.artist.replace(
"'",
'').replace(
'!',
'').replace(
'?',
'').replace(
'"',
'').replace(
'/',
'').replace(
'.',
'').replace(
'&',
'').replace(
',',
'').replace(
'(',
'').replace(
')',
'').replace(
' ',
'-')
34 title = song.title.replace(
"'",
'').replace(
'!',
'').replace(
'?',
'').replace(
'"',
'').replace(
'/',
'').replace(
'.',
'').replace(
'&',
'').replace(
',',
'').replace(
'(',
'').replace(
')',
'').replace(
' ',
'-')
35 url = self.
SEARCH_URL % (artist.lower(), title.lower())
37 log(
'%s: search url: %s' % (__title__, url), debug=self.
DEBUG)
38 search = requests.get(url, headers=UserAgent, timeout=10)
39 response = search.text
42 matchcode = re.search(
'details">(.*?)</div', response, flags=re.DOTALL)
44 lyricscode = (matchcode.group(1))
45 lyr = re.sub(
'<[^<]+?>',
'', lyricscode)
52 log(
"%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title), debug=self.
DEBUG)
53 lyrics = Lyrics(settings=self.
settings)
55 lyrics.source = __title__
57 artist = song.artist.replace(
' ',
'-')
58 title = song.title.replace(
' ',
'-')
61 search = requests.get(url, headers=UserAgent, timeout=10)
62 response = search.text
66 soup = BeautifulSoup(response,
'html.parser')
67 for link
in soup.find_all(
'a'):
68 if link.string
and link.get(
'href').startswith(
'/lrc/'):
69 foundartist = link.string.split(
' - ', 1)[0]
72 foundsong = link.string.split(
' - ', 1)[1].rstrip(
'.lrc')
75 if (difflib.SequenceMatcher(
None, artist.lower(), foundartist.lower()).ratio() > 0.8)
and (difflib.SequenceMatcher(
None, title.lower(), foundsong.lower()).ratio() > 0.8):
76 links.append((foundartist +
' - ' + foundsong, self.
LYRIC_URL % link.get(
'href'), foundartist, foundsong))
89 title,url,artist,song = link
91 log(
'%s: search url: %s' % (__title__, url), debug=self.
DEBUG)
92 search = requests.get(url, headers=UserAgent, timeout=10)
93 response = search.text
96 matchcode = re.search(
'/h3>(.*?)</div', response, flags=re.DOTALL)
98 lyricscode = (matchcode.group(1))
99 cleanlyrics = re.sub(
'<[^<]+?>',
'', lyricscode)
def get_lyrics_from_list(self, link)
def __init__(self, *args, **kwargs)
def get_lyrics(self, song)
None log(str msg, int level=LOGDEBUG)