MythTV master
lyricsScraper.py
Go to the documentation of this file.
1#-*- coding: UTF-8 -*-
2'''
3Scraper for http://lyrics.alsong.co.kr/
4driip
5'''
6
7import sys
8import socket
10import difflib
11import xml.dom.minidom as xml
12from utilities import *
13
14__title__ = 'Alsong'
15__priority__ = '150'
16__lrc__ = True
17
18socket.setdefaulttimeout(10)
19
20ALSONG_URL = 'http://lyrics.alsong.net/alsongwebservice/service1.asmx'
21
22ALSONG_TMPL = '''\
23<?xml version='1.0' encoding='UTF-8'?>
24<SOAP-ENV:Envelope xmlns:SOAP-ENV='http://www.w3.org/2003/05/soap-envelope' xmlns:SOAP-ENC='http://www.w3.org/2003/05/soap-encoding' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:ns2='ALSongWebServer/Service1Soap' xmlns:ns1='ALSongWebServer' xmlns:ns3='ALSongWebServer/Service1Soap12'>
25<SOAP-ENV:Body>
26 <ns1:GetResembleLyric2>
27 <ns1:stQuery>
28 <ns1:strTitle>%s</ns1:strTitle>
29 <ns1:strArtistName>%s</ns1:strArtistName>
30 <ns1:nCurPage>0</ns1:nCurPage>
31 </ns1:stQuery>
32 </ns1:GetResembleLyric2>
33</SOAP-ENV:Body>
34</SOAP-ENV:Envelope>
35'''
36
37
38class LyricsFetcher:
39 def __init__(self):
40 self.base_url = 'http://lyrics.alsong.co.kr/'
41
42 def get_lyrics(self, song):
43 log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title))
44 lyrics = Lyrics()
45 lyrics.song = song
46 lyrics.source = __title__
47 lyrics.lrc = __lrc__
48 try:
49 headers = {'Content-Type':'text/xml; charset=utf-8'}
50 request = urllib.request.Request(ALSONG_URL, bytes(ALSONG_TMPL % (song.title,song.artist), 'utf-8'), headers)
51 response = urllib.request.urlopen(request)
52 Page = response.read().decode('utf-8')
53 except:
54 return
55 tree = xml.parseString(Page)
56
57 try:
58 name = tree.getElementsByTagName('strArtistName')[0].childNodes[0].data
59 track = tree.getElementsByTagName('strTitle')[0].childNodes[0].data
60 except:
61 return
62 if (difflib.SequenceMatcher(None, song.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, song.title.lower(), track.lower()).ratio() > 0.8):
63 lyr = tree.getElementsByTagName('strLyric')[0].childNodes[0].data.replace('<br>','\n')
64 lyrics.lyrics = lyr
65 return lyrics
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9