MythTV master
lyricsScraper.py
Go to the documentation of this file.
1#-*- coding: UTF-8 -*-
2'''
3Scraper for https://www.megalobiz.com/
4
5megalobiz
6
7https://github.com/rtcq/syncedlyrics
8'''
9
10import requests
11import re
12from bs4 import BeautifulSoup
13from lib.utils import *
14
15__title__ = "Megalobiz"
16__priority__ = '140'
17__lrc__ = True
18
19
21 def __init__(self, *args, **kwargs):
22 self.DEBUG = kwargs['debug']
23 self.settings = kwargs['settings']
24 self.SEARCH_URL = 'https://www.megalobiz.com/search/all?qry=%s-%s&searchButton.x=0&searchButton.y=0'
25 self.LYRIC_URL = 'https://www.megalobiz.com/%s'
26
27 def get_lyrics(self, song):
28 log("%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title), debug=self.DEBUG)
29 lyrics = Lyrics(settings=self.settings)
30 lyrics.song = song
31 lyrics.source = __title__
32 lyrics.lrc = __lrc__
33 try:
34 url = self.SEARCH_URL % (song.artist, song.title)
35 response = requests.get(url, timeout=10)
36 result = response.text
37 except:
38 return None
39 links = []
40 soup = BeautifulSoup(result, 'html.parser')
41 for link in soup.find_all('a'):
42 if link.get('href') and link.get('href').startswith('/lrc/maker/'):
43 linktext = link.text.replace('_', ' ').strip()
44 if song.artist.lower() in linktext.lower() and song.title.lower() in linktext.lower():
45 links.append((linktext, self.LYRIC_URL % link.get('href'), song.artist, song.title))
46 if len(links) == 0:
47 return None
48 elif len(links) > 1:
49 lyrics.list = links
50 for link in links:
51 lyr = self.get_lyrics_from_list(link)
52 if lyr:
53 lyrics.lyrics = lyr
54 return lyrics
55 return None
56
57 def get_lyrics_from_list(self, link):
58 title,url,artist,song = link
59 try:
60 log('%s: search url: %s' % (__title__, url), debug=self.DEBUG)
61 response = requests.get(url, timeout=10)
62 result = response.text
63 except:
64 return None
65 matchcode = re.search('span id="lrc_[0-9]+_lyrics">(.*?)</span', result, flags=re.DOTALL)
66 if matchcode:
67 lyricscode = (matchcode.group(1))
68 cleanlyrics = re.sub('<[^<]+?>', '', lyricscode)
69 return cleanlyrics
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9