3 Scraper for http://lyrics.alsong.co.kr/
11 import xml.dom.minidom
as xml
12 from optparse
import OptionParser
13 from common
import utilities
15 __author__ =
"Paul Harrison and 'driip'"
17 __description__ =
"Search http://lyrics.alsong.co.kr"
20 __syncronized__ =
True
24 socket.setdefaulttimeout(10)
26 ALSONG_URL =
'http://lyrics.alsong.net/alsongwebservice/service1.asmx'
29 <?xml version='1.0' encoding='UTF-8'?>
30 <SOAP-ENV:Envelope xmlns:SOAP-ENV='http://www.w3.org/2003/05/soap-envelope' xmlns:SOAP-ENC='http://www.w3.org/2003/05/soap-encoding' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:ns2='ALSongWebServer/Service1Soap' xmlns:ns1='ALSongWebServer' xmlns:ns3='ALSongWebServer/Service1Soap12'>
32 <ns1:GetResembleLyric2>
34 <ns1:strTitle>%s</ns1:strTitle>
35 <ns1:strArtistName>%s</ns1:strArtistName>
36 <ns1:nCurPage>0</ns1:nCurPage>
38 </ns1:GetResembleLyric2>
48 utilities.log(debug,
"%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
51 headers = {
'Content-Type':
'text/xml; charset=utf-8'}
52 request = urllib2.Request(ALSONG_URL, ALSONG_TMPL % (lyrics.title, lyrics.artist), headers)
53 response = urllib2.urlopen(request)
54 Page = response.read()
57 tree = xml.parseString(Page)
59 name = tree.getElementsByTagName(
'strArtistName')[0].childNodes[0].data
60 track = tree.getElementsByTagName(
'strTitle')[0].childNodes[0].data
63 if (difflib.SequenceMatcher(
None, lyrics.artist.lower(), name.lower()).ratio() > 0.8)
and (difflib.SequenceMatcher(
None, lyrics.title.lower(), track.lower()).ratio() > 0.8):
64 lyr = tree.getElementsByTagName(
'strLyric')[0].childNodes[0].data.replace(
'<br>',
'\n')
65 lyrics.lyrics = lyr.encode(
'utf-8')
72 lyrics = utilities.Lyrics()
73 lyrics.source = __title__
74 lyrics.syncronized = __syncronized__
75 lyrics.artist =
'Dire Straits'
76 lyrics.album =
'Brothers In Arms'
77 lyrics.title =
'Money For Nothing'
80 found = fetcher.get_lyrics(lyrics)
83 utilities.log(
True,
"Everything appears in order.")
86 utilities.log(
True,
"The lyrics for the test search failed!")
90 from lxml
import etree
91 xml = etree.XML(
u'<lyrics></lyrics>')
92 etree.SubElement(xml,
"artist").text = lyrics.artist
93 etree.SubElement(xml,
"album").text = lyrics.album
94 etree.SubElement(xml,
"title").text = lyrics.title
95 etree.SubElement(xml,
"syncronized").text =
'True' if __syncronized__
else 'False'
96 etree.SubElement(xml,
"grabber").text = lyrics.source
98 lines = lyrics.lyrics.splitlines()
100 etree.SubElement(xml,
"lyric").text = line
102 utilities.log(
True, etree.tostring(xml, encoding=
'UTF-8', pretty_print=
True,
103 xml_declaration=
True))
107 from lxml
import etree
108 version = etree.XML(
u'<grabber></grabber>')
109 etree.SubElement(version,
"name").text = __title__
110 etree.SubElement(version,
"author").text = __author__
111 etree.SubElement(version,
"command").text =
'alsong.py'
112 etree.SubElement(version,
"type").text =
'lyrics'
113 etree.SubElement(version,
"description").text = __description__
114 etree.SubElement(version,
"version").text = __version__
115 etree.SubElement(version,
"priority").text = __priority__
116 etree.SubElement(version,
"syncronized").text =
'True' if __syncronized__
else 'False'
118 utilities.log(
True, etree.tostring(version, encoding=
'UTF-8', pretty_print=
True,
119 xml_declaration=
True))
125 parser = OptionParser()
127 parser.add_option(
'-v',
"--version", action=
"store_true", default=
False,
128 dest=
"version", help=
"Display version and author")
129 parser.add_option(
'-t',
"--test", action=
"store_true", default=
False,
130 dest=
"test", help=
"Perform self-test for dependencies.")
131 parser.add_option(
'-s',
"--search", action=
"store_true", default=
False,
132 dest=
"search", help=
"Search for lyrics.")
133 parser.add_option(
'-a',
"--artist", metavar=
"ARTIST", default=
None,
134 dest=
"artist", help=
"Artist of track.")
135 parser.add_option(
'-b',
"--album", metavar=
"ALBUM", default=
None,
136 dest=
"album", help=
"Album of track.")
137 parser.add_option(
'-n',
"--title", metavar=
"TITLE", default=
None,
138 dest=
"title", help=
"Title of track.")
139 parser.add_option(
'-f',
"--filename", metavar=
"FILENAME", default=
None,
140 dest=
"filename", help=
"Filename of track.")
141 parser.add_option(
'-d',
'--debug', action=
"store_true", default=
False,
142 dest=
"debug", help=(
"Show debug messages"))
144 opts, args = parser.parse_args()
146 lyrics = utilities.Lyrics()
147 lyrics.source = __title__
148 lyrics.syncronized = __syncronized__
160 lyrics.artist = opts.artist
162 lyrics.album = opts.album
164 lyrics.title = opts.title
166 lyrics.filename = opts.filename
169 utilities.log(
'ERROR: invalid arguments found')
173 if fetcher.get_lyrics(lyrics):
177 utilities.log(
True,
"No lyrics found for this track")
180 if __name__ ==
'__main__':