3 Scraper for http://www.baidu.com
13 from utilities
import *
19 socket.setdefaulttimeout(10)
23 self.
BASE_URL =
'http://music.baidu.com/search/lrc?key=%s-%s'
27 log(
'%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title))
30 lyrics.source = __title__
33 url = self.
BASE_URL % (song.title, song.artist)
34 data = urllib.request.urlopen(url).
read().
decode(
'utf-8')
35 songmatch = re.search(
'song-title.*?<em>(.*?)</em>', data, flags=re.DOTALL)
36 track = songmatch.group(1)
37 artistmatch = re.search(
'artist-title.*?<em>(.*?)</em>', data, flags=re.DOTALL)
38 name = artistmatch.group(1)
39 urlmatch = re.search(
"down-lrc-btn.*?':'(.*?)'", data, flags=re.DOTALL)
40 found_url = urlmatch.group(1)
41 if (difflib.SequenceMatcher(
None, song.artist.lower(), name.lower()).ratio() > 0.8)
and (difflib.SequenceMatcher(
None, song.title.lower(), track.lower()).ratio() > 0.8):
42 lyr = urllib.request.urlopen(self.
LRC_URL % found_url).
read()
48 enc = chardet.detect(lyr)
49 lyr = lyr.decode(enc[
'encoding'],
'ignore')