MythTV  master
lyricsScraper.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 '''
3 Scraper for http://lyrics.alsong.co.kr/
4 driip
5 '''
6 
7 import sys
8 import socket
9 import urllib.request
10 import difflib
11 import xml.dom.minidom as xml
12 from utilities import *
13 
14 __title__ = 'Alsong'
15 __priority__ = '150'
16 __lrc__ = True
17 
18 socket.setdefaulttimeout(10)
19 
20 ALSONG_URL = 'http://lyrics.alsong.net/alsongwebservice/service1.asmx'
21 
22 ALSONG_TMPL = '''\
23 <?xml version='1.0' encoding='UTF-8'?>
24 <SOAP-ENV:Envelope xmlns:SOAP-ENV='http://www.w3.org/2003/05/soap-envelope' xmlns:SOAP-ENC='http://www.w3.org/2003/05/soap-encoding' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:ns2='ALSongWebServer/Service1Soap' xmlns:ns1='ALSongWebServer' xmlns:ns3='ALSongWebServer/Service1Soap12'>
25 <SOAP-ENV:Body>
26  <ns1:GetResembleLyric2>
27  <ns1:stQuery>
28  <ns1:strTitle>%s</ns1:strTitle>
29  <ns1:strArtistName>%s</ns1:strArtistName>
30  <ns1:nCurPage>0</ns1:nCurPage>
31  </ns1:stQuery>
32  </ns1:GetResembleLyric2>
33 </SOAP-ENV:Body>
34 </SOAP-ENV:Envelope>
35 '''
36 
37 
39  def __init__(self):
40  self.base_url = 'http://lyrics.alsong.co.kr/'
41 
42  def get_lyrics(self, song):
43  log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title))
44  lyrics = Lyrics()
45  lyrics.song = song
46  lyrics.source = __title__
47  lyrics.lrc = __lrc__
48  try:
49  headers = {'Content-Type':'text/xml; charset=utf-8'}
50  request = urllib.request.Request(ALSONG_URL, bytes(ALSONG_TMPL % (song.title,song.artist), 'utf-8'), headers)
51  response = urllib.request.urlopen(request)
52  Page = response.read().decode('utf-8')
53  except:
54  return
55  tree = xml.parseString(Page)
56 
57  try:
58  name = tree.getElementsByTagName('strArtistName')[0].childNodes[0].data
59  track = tree.getElementsByTagName('strTitle')[0].childNodes[0].data
60  except:
61  return
62  if (difflib.SequenceMatcher(None, song.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, song.title.lower(), track.lower()).ratio() > 0.8):
63  lyr = tree.getElementsByTagName('strLyric')[0].childNodes[0].data.replace('<br>','\n')
64  lyrics.lyrics = lyr
65  return lyrics
alsong.lyricsScraper.LyricsFetcher.get_lyrics
def get_lyrics(self, song)
Definition: lyricsScraper.py:42
alsong.lyricsScraper.LyricsFetcher.__init__
def __init__(self)
Definition: lyricsScraper.py:39
alsong.lyricsScraper.LyricsFetcher.base_url
base_url
Definition: lyricsScraper.py:40
alsong.lyricsScraper.LyricsFetcher
Definition: lyricsScraper.py:38
musicbrainzngs.compat.bytes
bytes
Definition: compat.py:49
xbmc.log
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9