MythTV master
lyricsScraper.py
Go to the documentation of this file.
1#-*- coding: UTF-8 -*-
2'''
3Scraper for http://www.baidu.com
4
5ronie
6'''
7
9import socket
10import re
11import chardet
12import difflib
13from utilities import *
14
15__title__ = 'Baidu'
16__priority__ = '130'
17__lrc__ = True
18
19socket.setdefaulttimeout(10)
20
22 def __init__(self):
23 self.BASE_URL = 'http://music.baidu.com/search/lrc?key=%s-%s'
24 self.LRC_URL = 'http://music.baidu.com%s'
25
26 def get_lyrics(self, song):
27 log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title))
28 lyrics = Lyrics()
29 lyrics.song = song
30 lyrics.source = __title__
31 lyrics.lrc = __lrc__
32 try:
33 url = self.BASE_URL % (song.title, song.artist)
34 data = urllib.request.urlopen(url).read().decode('utf-8')
35 songmatch = re.search('song-title.*?<em>(.*?)</em>', data, flags=re.DOTALL)
36 track = songmatch.group(1)
37 artistmatch = re.search('artist-title.*?<em>(.*?)</em>', data, flags=re.DOTALL)
38 name = artistmatch.group(1)
39 urlmatch = re.search("down-lrc-btn.*?':'(.*?)'", data, flags=re.DOTALL)
40 found_url = urlmatch.group(1)
41 if (difflib.SequenceMatcher(None, song.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, song.title.lower(), track.lower()).ratio() > 0.8):
42 lyr = urllib.request.urlopen(self.LRC_URL % found_url).read()
43 else:
44 return
45 except:
46 return
47
48 enc = chardet.detect(lyr)
49 lyr = lyr.decode(enc['encoding'], 'ignore')
50 lyrics.lyrics = lyr
51 return lyrics
def read(device=None, features=[])
Definition: disc.py:35
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9