3 Scraper for https://xiami.com
15 from utilities
import *
21 UserAgent =
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0'
23 socket.setdefaulttimeout(10)
27 self.
LIST_URL =
'https://www.xiami.com/search?key=%s'
28 self.
SONG_URL =
'https://www.xiami.com/song/playlist/id/%s/object_name/default/object_id/0'
32 log(
"%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title))
35 lyrics.source = __title__
37 keyword =
"%s %s" % (song.title, song.artist)
38 url = self.
LIST_URL % (urllib.parse.quote(keyword))
40 response = self.
session.
get(url, headers={
'User-Agent': UserAgent,
'Referer':
'https://www.xiami.com/play'})
41 result = response.text
43 log(
"%s: %s::%s (%d) [%s]" % (
44 __title__, self.__class__.__name__,
45 sys.exc_info()[ 2 ].tb_frame.f_code.co_name,
46 sys.exc_info()[ 2 ].tb_lineno,
50 match = re.compile(
'<td class="chkbox">.+?value="(.+?)".+?href="//www.xiami.com/song/[^"]+" title="([^"]+)".*?href="//www.xiami.com/artist/[^"]+" title="([^"]+)"', re.DOTALL).findall(result)
55 if (difflib.SequenceMatcher(
None, song.artist.lower(), artist.lower()).ratio() > 0.8)
and (difflib.SequenceMatcher(
None, song.title.lower(), title.lower()).ratio() > 0.8):
56 links.append( ( artist +
' - ' + title, x[0], artist, title ) )
68 title,id,artist,song = link
70 response = self.
session.
get(self.
SONG_URL % (id), headers={
'User-Agent': UserAgent,
'Referer':
'https://www.xiami.com/play'})
71 result = response.text
72 data = json.loads(result)
73 if 'data' in data
and 'trackList' in data[
'data']
and data[
'data'][
'trackList']
and 'lyric' in data[
'data'][
'trackList'][0]
and data[
'data'][
'trackList'][0][
'lyric']:
74 url = data[
'data'][
'trackList'][0][
'lyric']
76 log(
"%s: %s::%s (%d) [%s]" % (
77 __title__, self.__class__.__name__,
78 sys.exc_info()[ 2 ].tb_frame.f_code.co_name,
79 sys.exc_info()[ 2 ].tb_lineno,
84 response = self.
session.
get(url, headers={
'User-Agent': UserAgent,
'Referer':
'https://www.xiami.com/play'})
85 lyrics = response.content
87 log(
"%s: %s::%s (%d) [%s]" % (
88 __title__, self.__class__.__name__,
89 sys.exc_info()[ 2 ].tb_frame.f_code.co_name,
90 sys.exc_info()[ 2 ].tb_lineno,
94 enc = chardet.detect(lyrics)
95 lyrics = lyrics.decode(enc[
'encoding'],
'ignore')