MythTV  master
lyricscom.py
Go to the documentation of this file.
1 # -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*-
2 """
3 Scraper for http://www.lyrdb.com/
4 
5 ronie
6 """
7 
8 import sys
9 import re
10 from urllib.parse import quote_plus
11 from urllib.request import urlopen
12 
13 import socket
14 import difflib
15 from optparse import OptionParser
16 from common import utilities
17 
18 __author__ = "Paul Harrison and 'ronie'"
19 __title__ = "Lyrics.Com"
20 __description__ = "Search http://www.lyrics.com for lyrics"
21 __priority__ = "240"
22 __version__ = "0.1"
23 __syncronized__ = False
24 
25 debug = False
26 
27 socket.setdefaulttimeout(10)
28 
30  def __init__( self ):
31  self.url = 'http://www.lyrics.com/serp.php?st=%s&qtype=2'
32 
33  def get_lyrics(self, lyrics):
34  utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
35 
36  try:
37  from bs4 import BeautifulSoup
38  except:
39  utilities.log(True, "Failed to import BeautifulSoup. This grabber requires python-bs4")
40  return False
41 
42  try:
43  request = urlopen(self.url % quote_plus(lyrics.artist))
44  response = request.read()
45  except:
46  return False
47 
48  request.close()
49  soup = BeautifulSoup(response, 'html.parser')
50  url = ''
51  for link in soup.find_all('a'):
52  if link.string and link.get('href').startswith('artist/'):
53  url = 'http://www.lyrics.com/' + link.get('href')
54  break
55  if url:
56  utilities.log(debug, "%s: Artist url is %s" % (__title__, url))
57  try:
58  req = urlopen(url)
59  resp = req.read().decode('utf-8')
60  except:
61  return False
62  req.close()
63  soup = BeautifulSoup(resp, 'html.parser')
64  url = ''
65  for link in soup.find_all('a'):
66  if link.string and link.get('href').startswith('/lyric/') and (difflib.SequenceMatcher(None, link.string.lower(), lyrics.title.lower()).ratio() > 0.8):
67  url = 'http://www.lyrics.com' + link.get('href')
68  break
69 
70  if url:
71  utilities.log(debug, "%s: Song url is %s" % (__title__, url))
72 
73  try:
74  req2 = urlopen(url)
75  resp2 = req2.read().decode('utf-8')
76  except:
77  return False
78  req2.close()
79 
80  matchcode = re.search(u'<pre.*?>(.*?)</pre>', resp2, flags=re.DOTALL)
81  if matchcode:
82  lyricscode = (matchcode.group(1))
83  lyr = re.sub(u'<[^<]+?>', '', lyricscode)
84  lyrics.lyrics = lyr.replace('\\n','\n')
85  return True
86 
87  return False
88 
90  try:
91  from bs4 import BeautifulSoup
92  except:
93  utilities.log(True, "Failed to import BeautifulSoup. This grabber requires python-bs4")
94  sys.exit(1)
95 
96  found = False
97  lyrics = utilities.Lyrics()
98  lyrics.source = __title__
99  lyrics.syncronized = __syncronized__
100  lyrics.artist = 'Dire Straits'
101  lyrics.album = 'Brothers In Arms'
102  lyrics.title = 'Money For Nothing'
103 
104  fetcher = LyricsFetcher()
105  found = fetcher.get_lyrics(lyrics)
106 
107  if found:
108  utilities.log(True, "Everything appears in order.")
109  buildLyrics(lyrics)
110  sys.exit(0)
111 
112  utilities.log(True, "The lyrics for the test search failed!")
113  sys.exit(1)
114 
115 def buildLyrics(lyrics):
116  from lxml import etree
117  xml = etree.XML(u'<lyrics></lyrics>')
118  etree.SubElement(xml, "artist").text = lyrics.artist
119  etree.SubElement(xml, "album").text = lyrics.album
120  etree.SubElement(xml, "title").text = lyrics.title
121  etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False'
122  etree.SubElement(xml, "grabber").text = lyrics.source
123 
124  lines = lyrics.lyrics.splitlines()
125  for line in lines:
126  etree.SubElement(xml, "lyric").text = line
127 
128  utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8',
129  pretty_print=True, xml_declaration=True)))
130  sys.exit(0)
131 
133  from lxml import etree
134  version = etree.XML(u'<grabber></grabber>')
135  etree.SubElement(version, "name").text = __title__
136  etree.SubElement(version, "author").text = __author__
137  etree.SubElement(version, "command").text = 'lyricscom.py'
138  etree.SubElement(version, "type").text = 'lyrics'
139  etree.SubElement(version, "description").text = __description__
140  etree.SubElement(version, "version").text = __version__
141  etree.SubElement(version, "priority").text = __priority__
142  etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False'
143 
144  utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8',
145  pretty_print=True, xml_declaration=True)))
146  sys.exit(0)
147 
148 def main():
149  global debug
150 
151  parser = OptionParser()
152 
153  parser.add_option('-v', "--version", action="store_true", default=False,
154  dest="version", help="Display version and author")
155  parser.add_option('-t', "--test", action="store_true", default=False,
156  dest="test", help="Test grabber with a know good search")
157  parser.add_option('-s', "--search", action="store_true", default=False,
158  dest="search", help="Search for lyrics.")
159  parser.add_option('-a', "--artist", metavar="ARTIST", default=None,
160  dest="artist", help="Artist of track.")
161  parser.add_option('-b', "--album", metavar="ALBUM", default=None,
162  dest="album", help="Album of track.")
163  parser.add_option('-n', "--title", metavar="TITLE", default=None,
164  dest="title", help="Title of track.")
165  parser.add_option('-f', "--filename", metavar="FILENAME", default=None,
166  dest="filename", help="Filename of track.")
167  parser.add_option('-d', '--debug', action="store_true", default=False,
168  dest="debug", help=("Show debug messages"))
169 
170  opts, args = parser.parse_args()
171 
172  lyrics = utilities.Lyrics()
173  lyrics.source = __title__
174  lyrics.syncronized = __syncronized__
175 
176  if opts.debug:
177  debug = True
178 
179  if opts.version:
180  buildVersion()
181 
182  if opts.test:
184 
185  if opts.artist:
186  lyrics.artist = opts.artist
187  if opts.album:
188  lyrics.album = opts.album
189  if opts.title:
190  lyrics.title = opts.title
191  if opts.filename:
192  lyrics.filename = opts.filename
193 
194  fetcher = LyricsFetcher()
195  if fetcher.get_lyrics(lyrics):
196  buildLyrics(lyrics)
197  sys.exit(0)
198  else:
199  utilities.log(True, "No lyrics found for this track")
200  sys.exit(1)
201 
202 if __name__ == '__main__':
203  main()
lyricscom.buildLyrics
def buildLyrics(lyrics)
Definition: lyricscom.py:115
lyricscom.LyricsFetcher.get_lyrics
def get_lyrics(self, lyrics)
Definition: lyricscom.py:33
lyricscom.LyricsFetcher.__init__
def __init__(self)
Definition: lyricscom.py:30
lyricscom.LyricsFetcher.url
url
Definition: lyricscom.py:31
decode
static int decode(unsigned char *vbiline, int scale0, int scale1)
Definition: cc.cpp:67
lyricscom.buildVersion
def buildVersion()
Definition: lyricscom.py:132
lyricscom.main
def main()
Definition: lyricscom.py:148
lyricscom.LyricsFetcher
Definition: lyricscom.py:29
lyricscom.performSelfTest
def performSelfTest()
Definition: lyricscom.py:89