MythTV  master
lyricscom.py
Go to the documentation of this file.
1 # -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*-
2 """
3 Scraper for http://www.lyrdb.com/
4 
5 ronie
6 """
7 
8 import sys
9 import re
10 try:
11  from urllib import quote_plus
12  from urllib2 import urlopen
13 except ImportError:
14  from urllib.parse import quote_plus
15  from urllib.request import urlopen
16 
17 import socket
18 import difflib
19 from optparse import OptionParser
20 from common import utilities
21 
22 __author__ = "Paul Harrison and 'ronie'"
23 __title__ = "Lyrics.Com"
24 __description__ = "Search http://www.lyrics.com for lyrics"
25 __priority__ = "240"
26 __version__ = "0.1"
27 __syncronized__ = False
28 
29 debug = False
30 
31 socket.setdefaulttimeout(10)
32 
34  def __init__( self ):
35  self.url = 'http://www.lyrics.com/serp.php?st=%s&qtype=2'
36 
37  def get_lyrics(self, lyrics):
38  utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
39 
40  try:
41  from bs4 import BeautifulSoup
42  except:
43  utilities.log(True, "Failed to import BeautifulSoup. This grabber requires python-bs4")
44  return False
45 
46  try:
47  request = urlopen(self.url % quote_plus(lyrics.artist))
48  response = request.read()
49  except:
50  return False
51 
52  request.close()
53  soup = BeautifulSoup(response, 'html.parser')
54  url = ''
55  for link in soup.find_all('a'):
56  if link.string and link.get('href').startswith('artist/'):
57  url = 'http://www.lyrics.com/' + link.get('href')
58  break
59  if url:
60  utilities.log(debug, "%s: Artist url is %s" % (__title__, url))
61  try:
62  req = urlopen(url)
63  resp = req.read().decode('utf-8')
64  except:
65  return False
66  req.close()
67  soup = BeautifulSoup(resp, 'html.parser')
68  url = ''
69  for link in soup.find_all('a'):
70  if link.string and link.get('href').startswith('/lyric/') and (difflib.SequenceMatcher(None, link.string.lower(), lyrics.title.lower()).ratio() > 0.8):
71  url = 'http://www.lyrics.com' + link.get('href')
72  break
73 
74  if url:
75  utilities.log(debug, "%s: Song url is %s" % (__title__, url))
76 
77  try:
78  req2 = urlopen(url)
79  resp2 = req2.read().decode('utf-8')
80  except:
81  return False
82  req2.close()
83 
84  matchcode = re.search(u'<pre.*?>(.*?)</pre>', resp2, flags=re.DOTALL)
85  if matchcode:
86  lyricscode = (matchcode.group(1))
87  lyr = re.sub(u'<[^<]+?>', '', lyricscode)
88  lyrics.lyrics = lyr.replace('\\n','\n')
89  return True
90 
91  return False
92 
94  try:
95  from bs4 import BeautifulSoup
96  except:
97  utilities.log(True, "Failed to import BeautifulSoup. This grabber requires python-bs4")
98  sys.exit(1)
99 
100  found = False
101  lyrics = utilities.Lyrics()
102  lyrics.source = __title__
103  lyrics.syncronized = __syncronized__
104  lyrics.artist = 'Dire Straits'
105  lyrics.album = 'Brothers In Arms'
106  lyrics.title = 'Money For Nothing'
107 
108  fetcher = LyricsFetcher()
109  found = fetcher.get_lyrics(lyrics)
110 
111  if found:
112  utilities.log(True, "Everything appears in order.")
113  buildLyrics(lyrics)
114  sys.exit(0)
115 
116  utilities.log(True, "The lyrics for the test search failed!")
117  sys.exit(1)
118 
119 def buildLyrics(lyrics):
120  from lxml import etree
121  xml = etree.XML(u'<lyrics></lyrics>')
122  etree.SubElement(xml, "artist").text = lyrics.artist
123  etree.SubElement(xml, "album").text = lyrics.album
124  etree.SubElement(xml, "title").text = lyrics.title
125  etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False'
126  etree.SubElement(xml, "grabber").text = lyrics.source
127 
128  lines = lyrics.lyrics.splitlines()
129  for line in lines:
130  etree.SubElement(xml, "lyric").text = line
131 
132  utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8',
133  pretty_print=True, xml_declaration=True)))
134  sys.exit(0)
135 
137  from lxml import etree
138  version = etree.XML(u'<grabber></grabber>')
139  etree.SubElement(version, "name").text = __title__
140  etree.SubElement(version, "author").text = __author__
141  etree.SubElement(version, "command").text = 'lyricscom.py'
142  etree.SubElement(version, "type").text = 'lyrics'
143  etree.SubElement(version, "description").text = __description__
144  etree.SubElement(version, "version").text = __version__
145  etree.SubElement(version, "priority").text = __priority__
146  etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False'
147 
148  utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8',
149  pretty_print=True, xml_declaration=True)))
150  sys.exit(0)
151 
152 def main():
153  global debug
154 
155  parser = OptionParser()
156 
157  parser.add_option('-v', "--version", action="store_true", default=False,
158  dest="version", help="Display version and author")
159  parser.add_option('-t', "--test", action="store_true", default=False,
160  dest="test", help="Test grabber with a know good search")
161  parser.add_option('-s', "--search", action="store_true", default=False,
162  dest="search", help="Search for lyrics.")
163  parser.add_option('-a', "--artist", metavar="ARTIST", default=None,
164  dest="artist", help="Artist of track.")
165  parser.add_option('-b', "--album", metavar="ALBUM", default=None,
166  dest="album", help="Album of track.")
167  parser.add_option('-n', "--title", metavar="TITLE", default=None,
168  dest="title", help="Title of track.")
169  parser.add_option('-f', "--filename", metavar="FILENAME", default=None,
170  dest="filename", help="Filename of track.")
171  parser.add_option('-d', '--debug', action="store_true", default=False,
172  dest="debug", help=("Show debug messages"))
173 
174  opts, args = parser.parse_args()
175 
176  lyrics = utilities.Lyrics()
177  lyrics.source = __title__
178  lyrics.syncronized = __syncronized__
179 
180  if opts.debug:
181  debug = True
182 
183  if opts.version:
184  buildVersion()
185 
186  if opts.test:
188 
189  if opts.artist:
190  lyrics.artist = opts.artist
191  if opts.album:
192  lyrics.album = opts.album
193  if opts.title:
194  lyrics.title = opts.title
195  if opts.filename:
196  lyrics.filename = opts.filename
197 
198  fetcher = LyricsFetcher()
199  if fetcher.get_lyrics(lyrics):
200  buildLyrics(lyrics)
201  sys.exit(0)
202  else:
203  utilities.log(True, "No lyrics found for this track")
204  sys.exit(1)
205 
206 if __name__ == '__main__':
207  main()
lyricscom.buildLyrics
def buildLyrics(lyrics)
Definition: lyricscom.py:119
lyricscom.LyricsFetcher.get_lyrics
def get_lyrics(self, lyrics)
Definition: lyricscom.py:37
lyricscom.LyricsFetcher.__init__
def __init__(self)
Definition: lyricscom.py:34
lyricscom.LyricsFetcher.url
url
Definition: lyricscom.py:35
decode
static int decode(unsigned char *vbiline, int scale0, int scale1)
Definition: cc.cpp:70
lyricscom.buildVersion
def buildVersion()
Definition: lyricscom.py:136
lyricscom.main
def main()
Definition: lyricscom.py:152
lyricscom.LyricsFetcher
Definition: lyricscom.py:33
lyricscom.performSelfTest
def performSelfTest()
Definition: lyricscom.py:93