MythTV  master
lyricsScraper.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 '''
3 Scraper for http://www.baidu.com
4 
5 ronie
6 '''
7 
8 import urllib.request
9 import socket
10 import re
11 import chardet
12 import difflib
13 from utilities import *
14 
15 __title__ = 'Baidu'
16 __priority__ = '130'
17 __lrc__ = True
18 
19 socket.setdefaulttimeout(10)
20 
22  def __init__(self):
23  self.BASE_URL = 'http://music.baidu.com/search/lrc?key=%s-%s'
24  self.LRC_URL = 'http://music.baidu.com%s'
25 
26  def get_lyrics(self, song):
27  log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title))
28  lyrics = Lyrics()
29  lyrics.song = song
30  lyrics.source = __title__
31  lyrics.lrc = __lrc__
32  try:
33  url = self.BASE_URL % (song.title, song.artist)
34  data = urllib.request.urlopen(url).read().decode('utf-8')
35  songmatch = re.search('song-title.*?<em>(.*?)</em>', data, flags=re.DOTALL)
36  track = songmatch.group(1)
37  artistmatch = re.search('artist-title.*?<em>(.*?)</em>', data, flags=re.DOTALL)
38  name = artistmatch.group(1)
39  urlmatch = re.search("down-lrc-btn.*?':'(.*?)'", data, flags=re.DOTALL)
40  found_url = urlmatch.group(1)
41  if (difflib.SequenceMatcher(None, song.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, song.title.lower(), track.lower()).ratio() > 0.8):
42  lyr = urllib.request.urlopen(self.LRC_URL % found_url).read()
43  else:
44  return
45  except:
46  return
47 
48  enc = chardet.detect(lyr)
49  lyr = lyr.decode(enc['encoding'], 'ignore')
50  lyrics.lyrics = lyr
51  return lyrics
baidu.lyricsScraper.LyricsFetcher
Definition: lyricsScraper.py:21
baidu.lyricsScraper.LyricsFetcher.LRC_URL
LRC_URL
Definition: lyricsScraper.py:24
discid.disc.read
def read(device=None, features=[])
Definition: disc.py:35
decode
static int decode(unsigned char *vbiline, int scale0, int scale1)
Definition: cc.cpp:67
baidu.lyricsScraper.LyricsFetcher.get_lyrics
def get_lyrics(self, song)
Definition: lyricsScraper.py:26
baidu.lyricsScraper.LyricsFetcher.BASE_URL
BASE_URL
Definition: lyricsScraper.py:23
baidu.lyricsScraper.LyricsFetcher.__init__
def __init__(self)
Definition: lyricsScraper.py:22
xbmc.log
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9