MythTV  master
alsong.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 """
3 Scraper for http://lyrics.alsong.co.kr/
4 driip
5 """
6 
7 import sys
8 import socket
9 import urllib2
10 import difflib
11 import xml.dom.minidom as xml
12 from optparse import OptionParser
13 from common import utilities
14 
15 __author__ = "Paul Harrison and 'driip'"
16 __title__ = "Alsong"
17 __description__ = "Search http://lyrics.alsong.co.kr"
18 __version__ = "0.1"
19 __priority__ = "140"
20 __syncronized__ = True
21 
22 debug = False
23 
24 socket.setdefaulttimeout(10)
25 
26 ALSONG_URL = 'http://lyrics.alsong.net/alsongwebservice/service1.asmx'
27 
28 ALSONG_TMPL = '''\
29 <?xml version='1.0' encoding='UTF-8'?>
30 <SOAP-ENV:Envelope xmlns:SOAP-ENV='http://www.w3.org/2003/05/soap-envelope' xmlns:SOAP-ENC='http://www.w3.org/2003/05/soap-encoding' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:ns2='ALSongWebServer/Service1Soap' xmlns:ns1='ALSongWebServer' xmlns:ns3='ALSongWebServer/Service1Soap12'>
31 <SOAP-ENV:Body>
32  <ns1:GetResembleLyric2>
33  <ns1:stQuery>
34  <ns1:strTitle>%s</ns1:strTitle>
35  <ns1:strArtistName>%s</ns1:strArtistName>
36  <ns1:nCurPage>0</ns1:nCurPage>
37  </ns1:stQuery>
38  </ns1:GetResembleLyric2>
39 </SOAP-ENV:Body>
40 </SOAP-ENV:Envelope>
41 '''
42 
44  def __init__( self ):
45  self.base_url = 'http://lyrics.alsong.co.kr/'
46 
47  def get_lyrics(self, lyrics):
48  utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
49 
50  try:
51  headers = {'Content-Type':'text/xml; charset=utf-8'}
52  request = urllib2.Request(ALSONG_URL, ALSONG_TMPL % (lyrics.title, lyrics.artist), headers)
53  response = urllib2.urlopen(request)
54  Page = response.read()
55  except:
56  return False
57  tree = xml.parseString(Page)
58  try:
59  name = tree.getElementsByTagName('strArtistName')[0].childNodes[0].data
60  track = tree.getElementsByTagName('strTitle')[0].childNodes[0].data
61  except:
62  return False
63  if (difflib.SequenceMatcher(None, lyrics.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, lyrics.title.lower(), track.lower()).ratio() > 0.8):
64  lyr = tree.getElementsByTagName('strLyric')[0].childNodes[0].data.replace('<br>','\n')
65  lyrics.lyrics = lyr.encode('utf-8')
66  return True
67 
68  return False
69 
71  found = False
72  lyrics = utilities.Lyrics()
73  lyrics.source = __title__
74  lyrics.syncronized = __syncronized__
75  lyrics.artist = 'Dire Straits'
76  lyrics.album = 'Brothers In Arms'
77  lyrics.title = 'Money For Nothing'
78 
79  fetcher = LyricsFetcher()
80  found = fetcher.get_lyrics(lyrics)
81 
82  if found:
83  utilities.log(True, "Everything appears in order.")
84  sys.exit(0)
85 
86  utilities.log(True, "The lyrics for the test search failed!")
87  sys.exit(1)
88 
89 def buildLyrics(lyrics):
90  from lxml import etree
91  xml = etree.XML(u'<lyrics></lyrics>')
92  etree.SubElement(xml, "artist").text = lyrics.artist
93  etree.SubElement(xml, "album").text = lyrics.album
94  etree.SubElement(xml, "title").text = lyrics.title
95  etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False'
96  etree.SubElement(xml, "grabber").text = lyrics.source
97 
98  lines = lyrics.lyrics.splitlines()
99  for line in lines:
100  etree.SubElement(xml, "lyric").text = line
101 
102  utilities.log(True, etree.tostring(xml, encoding='UTF-8', pretty_print=True,
103  xml_declaration=True))
104  sys.exit(0)
105 
107  from lxml import etree
108  version = etree.XML(u'<grabber></grabber>')
109  etree.SubElement(version, "name").text = __title__
110  etree.SubElement(version, "author").text = __author__
111  etree.SubElement(version, "command").text = 'alsong.py'
112  etree.SubElement(version, "type").text = 'lyrics'
113  etree.SubElement(version, "description").text = __description__
114  etree.SubElement(version, "version").text = __version__
115  etree.SubElement(version, "priority").text = __priority__
116  etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False'
117 
118  utilities.log(True, etree.tostring(version, encoding='UTF-8', pretty_print=True,
119  xml_declaration=True))
120  sys.exit(0)
121 
122 def main():
123  global debug
124 
125  parser = OptionParser()
126 
127  parser.add_option('-v', "--version", action="store_true", default=False,
128  dest="version", help="Display version and author")
129  parser.add_option('-t', "--test", action="store_true", default=False,
130  dest="test", help="Perform self-test for dependencies.")
131  parser.add_option('-s', "--search", action="store_true", default=False,
132  dest="search", help="Search for lyrics.")
133  parser.add_option('-a', "--artist", metavar="ARTIST", default=None,
134  dest="artist", help="Artist of track.")
135  parser.add_option('-b', "--album", metavar="ALBUM", default=None,
136  dest="album", help="Album of track.")
137  parser.add_option('-n', "--title", metavar="TITLE", default=None,
138  dest="title", help="Title of track.")
139  parser.add_option('-f', "--filename", metavar="FILENAME", default=None,
140  dest="filename", help="Filename of track.")
141  parser.add_option('-d', '--debug', action="store_true", default=False,
142  dest="debug", help=("Show debug messages"))
143 
144  opts, args = parser.parse_args()
145 
146  lyrics = utilities.Lyrics()
147  lyrics.source = __title__
148  lyrics.syncronized = __syncronized__
149 
150  if opts.debug:
151  debug = True
152 
153  if opts.version:
154  buildVersion()
155 
156  if opts.test:
158 
159  if opts.artist:
160  lyrics.artist = opts.artist
161  if opts.album:
162  lyrics.album = opts.album
163  if opts.title:
164  lyrics.title = opts.title
165  if opts.filename:
166  lyrics.filename = opts.filename
167 
168  if (len(args) > 0):
169  utilities.log('ERROR: invalid arguments found')
170  sys.exit(1)
171 
172  fetcher = LyricsFetcher()
173  if fetcher.get_lyrics(lyrics):
174  buildLyrics(lyrics)
175  sys.exit(0)
176  else:
177  utilities.log(True, "No lyrics found for this track")
178  sys.exit(1)
179 
180 if __name__ == '__main__':
181  main()
alsong.performSelfTest
def performSelfTest()
Definition: alsong.py:70
alsong.LyricsFetcher.__init__
def __init__(self)
Definition: alsong.py:44
alsong.buildVersion
def buildVersion()
Definition: alsong.py:106
alsong.main
def main()
Definition: alsong.py:122
alsong.LyricsFetcher.base_url
base_url
Definition: alsong.py:45
alsong.LyricsFetcher
Definition: alsong.py:43
alsong.buildLyrics
def buildLyrics(lyrics)
Definition: alsong.py:89
alsong.LyricsFetcher.get_lyrics
def get_lyrics(self, lyrics)
Definition: alsong.py:47