3 Scraper for http://lyrics.alsong.co.kr/
11 import xml.dom.minidom
as xml
12 from utilities
import *
18 socket.setdefaulttimeout(10)
20 ALSONG_URL =
'http://lyrics.alsong.net/alsongwebservice/service1.asmx'
23 <?xml version='1.0' encoding='UTF-8'?>
24 <SOAP-ENV:Envelope xmlns:SOAP-ENV='http://www.w3.org/2003/05/soap-envelope' xmlns:SOAP-ENC='http://www.w3.org/2003/05/soap-encoding' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:ns2='ALSongWebServer/Service1Soap' xmlns:ns1='ALSongWebServer' xmlns:ns3='ALSongWebServer/Service1Soap12'>
26 <ns1:GetResembleLyric2>
28 <ns1:strTitle>%s</ns1:strTitle>
29 <ns1:strArtistName>%s</ns1:strArtistName>
30 <ns1:nCurPage>0</ns1:nCurPage>
32 </ns1:GetResembleLyric2>
43 log(
'%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title))
46 lyrics.source = __title__
49 headers = {
'Content-Type':
'text/xml; charset=utf-8'}
50 request = urllib.request.Request(ALSONG_URL,
bytes(ALSONG_TMPL % (song.title,song.artist),
'utf-8'), headers)
51 response = urllib.request.urlopen(request)
52 Page = response.read().decode(
'utf-8')
55 tree = xml.parseString(Page)
58 name = tree.getElementsByTagName(
'strArtistName')[0].childNodes[0].data
59 track = tree.getElementsByTagName(
'strTitle')[0].childNodes[0].data
62 if (difflib.SequenceMatcher(
None, song.artist.lower(), name.lower()).ratio() > 0.8)
and (difflib.SequenceMatcher(
None, song.title.lower(), track.lower()).ratio() > 0.8):
63 lyr = tree.getElementsByTagName(
'strLyric')[0].childNodes[0].data.replace(
'<br>',
'\n')