3 Scraper for http://www.viewlyrics.com
6 https://github.com/PedroHLC/ViewLyricsOpenSearcher
9 https://github.com/rikels/LyricsSearch
19 __title__ =
'MiniLyrics'
26 Minilyrics specific functions
32 while (i < (len(hexx) - 1)):
33 string += chr(int(hexx[i] + hexx[i + 1], 16))
41 md5.update(data + md5_extra)
42 hasheddata = MiniLyrics.hexToStr(md5.hexdigest())
51 magickey = chr(int(round(float(j) / float(datalen))))
52 encddata = list(range(len(data)))
53 if isinstance(magickey, int):
56 magickey = ord(magickey)
57 for i
in range(datalen):
58 if isinstance(data[i], int):
59 encddata[i] = data[i] ^ magickey
61 encddata[i] = ord(data[i]) ^ magickey
63 result =
'\x02' + chr(magickey) +
'\x04\x00\x00\x00' + str(hasheddata) + bytearray(encddata).decode(
'utf-8')
64 except UnicodeDecodeError:
65 ecd = chardet.detect(bytearray(encddata))
68 result =
'\x02' + chr(magickey) +
'\x04\x00\x00\x00' + str(hasheddata) + bytearray(encddata).decode(ecd[
'encoding'])
70 result =
'\x02' + chr(magickey) +
'\x04\x00\x00\x00' + str(hasheddata) +
"".join(map(chr, bytearray(encddata)))
72 result =
'\x02' + chr(magickey) +
'\x04\x00\x00\x00' + str(hasheddata) +
"".join(map(chr, bytearray(encddata)))
81 if isinstance(magickey, int):
84 magickey = ord(magickey)
85 for i
in range(22, datalen):
86 if isinstance(data[i], int):
87 result += chr(data[i] ^ magickey)
89 result += chr(ord(data[i]) ^ magickey)
99 entities = {
''':
'\'',
'"':
'"',
'>':
'>',
'<':
'<',
'&':
'&'}
101 string = string.replace(i,entities[i])
105 log(
'%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title), debug=self.
DEBUG)
106 lyrics = Lyrics(settings=self.
settings)
108 lyrics.source = __title__
110 search_url =
'http://search.crintsoft.com/searchlyrics.htm'
111 search_query_base =
"<?xml version='1.0' encoding='utf-8' standalone='yes' ?><searchV1 client=\"ViewLyricsOpenSearcher\" artist=\"{artist}\" title=\"{title}\" OnlyMatched=\"1\" />"
112 search_useragent =
'MiniLyrics'
113 search_md5watermark = b
'Mlv1clt4.0'
114 search_encquery = MiniLyrics.vl_enc(search_query_base.format(artist=song.artist, title=song.title).encode(
'utf-8'), search_md5watermark)
115 headers = {
"User-Agent":
"{ua}".format(ua=search_useragent),
116 "Content-Length":
"{content_length}".format(content_length=len(search_encquery)),
117 "Connection":
"Keep-Alive",
118 "Expect":
"100-continue",
119 "Content-Type":
"application/x-www-form-urlencoded"
122 request = requests.post(search_url, data=search_encquery, headers=headers, timeout=10)
123 search_result = request.text
126 rawdata = MiniLyrics.vl_dec(search_result)
128 lrcdata = rawdata.replace(
'\x00',
'*')
129 artistmatch = re.search(
'artist\*(.*?)\*',lrcdata)
132 titlematch = re.search(
'title\*(.*?)\*',lrcdata)
135 artist = artistmatch.group(1)
136 title = titlematch.group(1)
138 if (difflib.SequenceMatcher(
None, song.artist.lower(), artist.lower()).ratio() > 0.8)
and (difflib.SequenceMatcher(
None, song.title.lower(), title.lower()).ratio() > 0.8):
139 results = re.findall(
'[a-z0-9/_]*?\.lrc', lrcdata)
141 links.append((artist +
' - ' + title, item, artist, title))
153 title,url,artist,song = link
155 f = requests.get(
'http://search.crintsoft.com/l/' + url, timeout=10)
159 enc = chardet.detect(lyrics)
160 lyrics = lyrics.decode(enc[
'encoding'],
'ignore')