3Scraper for https://www.megalobiz.com/
7https://github.com/rtcq/syncedlyrics
12from bs4
import BeautifulSoup
15__title__ =
"Megalobiz"
24 self.
SEARCH_URL =
'https://www.megalobiz.com/search/all?qry=%s-%s&searchButton.x=0&searchButton.y=0'
28 log(
"%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title), debug=self.
DEBUG)
29 lyrics = Lyrics(settings=self.
settings)
31 lyrics.source = __title__
34 url = self.
SEARCH_URL % (song.artist, song.title)
35 response = requests.get(url, timeout=10)
36 result = response.text
40 soup = BeautifulSoup(result,
'html.parser')
41 for link
in soup.find_all(
'a'):
42 if link.get(
'href')
and link.get(
'href').startswith(
'/lrc/maker/'):
43 linktext = link.text.replace(
'_',
' ').strip()
44 if song.artist.lower()
in linktext.lower()
and song.title.lower()
in linktext.lower():
45 links.append((linktext, self.
LYRIC_URL % link.get(
'href'), song.artist, song.title))
58 title,url,artist,song = link
60 log(
'%s: search url: %s' % (__title__, url), debug=self.
DEBUG)
61 response = requests.get(url, timeout=10)
62 result = response.text
65 matchcode = re.search(
'span id="lrc_[0-9]+_lyrics">(.*?)</span', result, flags=re.DOTALL)
67 lyricscode = (matchcode.group(1))
68 cleanlyrics = re.sub(
'<[^<]+?>',
'', lyricscode)
def get_lyrics(self, song)
def __init__(self, *args, **kwargs)
def get_lyrics_from_list(self, link)
None log(str msg, int level=LOGDEBUG)