MythTV  master
baidu.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 """
3 Scraper for http://www.baidu.com
4 
5 ronie
6 """
7 
8 import sys
9 import urllib
10 import socket
11 import re
12 import chardet
13 import difflib
14 from optparse import OptionParser
15 from common import utilities
16 
17 __author__ = "Paul Harrison and 'ronie'"
18 __title__ = "Baidu"
19 __description__ = "Search http://www.baidu.com for lyrics"
20 __version__ = "0.1"
21 __priority__ = "210"
22 __syncronized__ = True
23 
24 debug = False
25 
26 socket.setdefaulttimeout(10)
27 
29  def __init__( self ):
30  self.BASE_URL = 'http://music.baidu.com/search/lrc?key=%s-%s'
31  self.LRC_URL = 'http://music.baidu.com%s'
32 
33  def get_lyrics(self, lyrics):
34  utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
35 
36  try:
37  url = self.BASE_URL % (lyrics.title, lyrics.artist)
38  utilities.log(debug, "%s: searching url %s" % (__title__, url))
39  data = urllib.urlopen(url).read()
40  songmatch = re.search('song-title.*?<em>(.*?)</em>', data, flags=re.DOTALL)
41  track = songmatch.group(1)
42  artistmatch = re.search('artist-title.*?<em>(.*?)</em>', data, flags=re.DOTALL)
43  name = artistmatch.group(1)
44  urlmatch = re.search("down-lrc-btn.*?':'(.*?)'", data, flags=re.DOTALL)
45  found_url = urlmatch.group(1)
46  if (difflib.SequenceMatcher(None, lyrics.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, lyrics.title.lower(), track.lower()).ratio() > 0.8):
47  lyr = urllib.urlopen(self.LRC_URL % found_url).read()
48  else:
49  return False
50  except:
51  return False
52 
53  enc = chardet.detect(lyr)
54  lyr = lyr.decode(enc['encoding'], 'ignore')
55  lyrics.lyrics = lyr
56  return True
57 
59  found = False
60  lyrics = utilities.Lyrics()
61  lyrics.source = __title__
62  lyrics.syncronized = __syncronized__
63  lyrics.artist = 'Dire Straits'
64  lyrics.album = 'Brothers In Arms'
65  lyrics.title = 'Money For Nothing'
66 
67  fetcher = LyricsFetcher()
68  found = fetcher.get_lyrics(lyrics)
69 
70  if found:
71  utilities.log(True, "Everything appears in order.")
72  sys.exit(0)
73 
74  utilities.log(True, "The lyrics for the test search failed!")
75  sys.exit(1)
76 
77 def buildLyrics(lyrics):
78  from lxml import etree
79  xml = etree.XML(u'<lyrics></lyrics>')
80  etree.SubElement(xml, "artist").text = lyrics.artist
81  etree.SubElement(xml, "album").text = lyrics.album
82  etree.SubElement(xml, "title").text = lyrics.title
83  etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False'
84  etree.SubElement(xml, "grabber").text = lyrics.source
85 
86  lines = lyrics.lyrics.splitlines()
87  for line in lines:
88  etree.SubElement(xml, "lyric").text = line
89 
90  utilities.log(True, etree.tostring(xml, encoding='UTF-8', pretty_print=True,
91  xml_declaration=True))
92  sys.exit(0)
93 
95  from lxml import etree
96  version = etree.XML(u'<grabber></grabber>')
97  etree.SubElement(version, "name").text = __title__
98  etree.SubElement(version, "author").text = __author__
99  etree.SubElement(version, "command").text = 'baidu.py'
100  etree.SubElement(version, "type").text = 'lyrics'
101  etree.SubElement(version, "description").text = __description__
102  etree.SubElement(version, "version").text = __version__
103  etree.SubElement(version, "priority").text = __priority__
104  etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False'
105 
106  utilities.log(True, etree.tostring(version, encoding='UTF-8', pretty_print=True,
107  xml_declaration=True))
108  sys.exit(0)
109 
110 def main():
111  global debug
112 
113  parser = OptionParser()
114 
115  parser.add_option('-v', "--version", action="store_true", default=False,
116  dest="version", help="Display version and author")
117  parser.add_option('-t', "--test", action="store_true", default=False,
118  dest="test", help="Perform self-test for dependencies.")
119  parser.add_option('-s', "--search", action="store_true", default=False,
120  dest="search", help="Search for lyrics.")
121  parser.add_option('-a', "--artist", metavar="ARTIST", default=None,
122  dest="artist", help="Artist of track.")
123  parser.add_option('-b', "--album", metavar="ALBUM", default=None,
124  dest="album", help="Album of track.")
125  parser.add_option('-n', "--title", metavar="TITLE", default=None,
126  dest="title", help="Title of track.")
127  parser.add_option('-f', "--filename", metavar="FILENAME", default=None,
128  dest="filename", help="Filename of track.")
129  parser.add_option('-d', '--debug', action="store_true", default=False,
130  dest="debug", help=("Show debug messages"))
131 
132  opts, args = parser.parse_args()
133 
134  lyrics = utilities.Lyrics()
135  lyrics.source = __title__
136  lyrics.syncronized = __syncronized__
137 
138  if opts.debug:
139  debug = True
140 
141  if opts.version:
142  buildVersion()
143 
144  if opts.test:
146 
147  if opts.artist:
148  lyrics.artist = opts.artist
149  if opts.album:
150  lyrics.album = opts.album
151  if opts.title:
152  lyrics.title = opts.title
153  if opts.filename:
154  lyrics.filename = opts.filename
155 
156  if (len(args) > 0):
157  utilities.log('ERROR: invalid arguments found')
158  sys.exit(1)
159 
160  fetcher = LyricsFetcher()
161  if fetcher.get_lyrics(lyrics):
162  buildLyrics(lyrics)
163  sys.exit(0)
164  else:
165  utilities.log(True, "No lyrics found for this track")
166  sys.exit(1)
167 
168 if __name__ == '__main__':
169  main()
baidu.LyricsFetcher.LRC_URL
LRC_URL
Definition: baidu.py:31
discid.disc.read
def read(device=None, features=[])
Definition: disc.py:35
baidu.main
def main()
Definition: baidu.py:110
baidu.buildVersion
def buildVersion()
Definition: baidu.py:94
baidu.LyricsFetcher.BASE_URL
BASE_URL
Definition: baidu.py:30
baidu.performSelfTest
def performSelfTest()
Definition: baidu.py:58
baidu.LyricsFetcher
Definition: baidu.py:28
baidu.buildLyrics
def buildLyrics(lyrics)
Definition: baidu.py:77
baidu.LyricsFetcher.__init__
def __init__(self)
Definition: baidu.py:29
baidu.LyricsFetcher.get_lyrics
def get_lyrics(self, lyrics)
Definition: baidu.py:33