MythTV  master
lyricscom.py
Go to the documentation of this file.
1 # -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*-
2 """
3 Scraper for http://www.lyrics.com/
4 
5 ronie
6 """
7 
8 import re
9 import requests
10 import urllib.parse
11 import difflib
12 from bs4 import BeautifulSoup
13 
14 import sys
15 from optparse import OptionParser
16 from common import utilities
17 
18 __author__ = "Paul Harrison and ronie"
19 __title__ = "Lyrics.Com"
20 __description__ = "Search http://www.lyrics.com for lyrics"
21 __priority__ = "240"
22 __version__ = "0.1"
23 __syncronized__ = False
24 
25 debug = False
26 
28  def __init__( self ):
29  self.url = 'http://www.lyrics.com/serp.php?st=%s&qtype=2'
30 
31  def get_lyrics(self, lyrics):
32  utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
33 
34  sess = requests.Session()
35 
36  try:
37  request = sess.get(self.url % urllib.parse.quote_plus(lyrics.artist), timeout=10)
38  response = request.text
39  except:
40  return False
41  soup = BeautifulSoup(response, 'html.parser')
42  url = ''
43  for link in soup.find_all('a'):
44  if link.string and link.get('href').startswith('artist/'):
45  url = 'https://www.lyrics.com/' + link.get('href')
46  break
47  if url:
48  try:
49  req = sess.get(url, timeout=10)
50  resp = req.text
51  except:
52  return False
53  soup = BeautifulSoup(resp, 'html.parser')
54  url = ''
55  for link in soup.find_all('a'):
56  if link.string and (difflib.SequenceMatcher(None, link.string.lower(), lyrics.title.lower()).ratio() > 0.8):
57  url = 'https://www.lyrics.com' + link.get('href')
58  break
59  if url:
60  try:
61  req2 = sess.get(url, timeout=10)
62  resp2 = req2.text
63  except:
64  return False
65  matchcode = re.search('<pre.*?>(.*?)</pre>', resp2, flags=re.DOTALL)
66  if matchcode:
67  lyricscode = (matchcode.group(1))
68  lyr = re.sub('<[^<]+?>', '', lyricscode)
69  lyrics.lyrics = lyr.replace('\\n','\n')
70  return True
71 
72  return False
73 
75  try:
76  from bs4 import BeautifulSoup
77  except:
78  utilities.log(True, "Failed to import BeautifulSoup. This grabber requires python-bs4")
79  sys.exit(1)
80 
81  found = False
82  lyrics = utilities.Lyrics()
83  lyrics.source = __title__
84  lyrics.syncronized = __syncronized__
85  lyrics.artist = 'Dire Straits'
86  lyrics.album = 'Brothers In Arms'
87  lyrics.title = 'Money For Nothing'
88 
89  fetcher = LyricsFetcher()
90  found = fetcher.get_lyrics(lyrics)
91 
92  if found:
93  utilities.log(True, "Everything appears in order.")
94  buildLyrics(lyrics)
95  sys.exit(0)
96 
97  utilities.log(True, "The lyrics for the test search failed!")
98  sys.exit(1)
99 
100 def buildLyrics(lyrics):
101  from lxml import etree
102  xml = etree.XML(u'<lyrics></lyrics>')
103  etree.SubElement(xml, "artist").text = lyrics.artist
104  etree.SubElement(xml, "album").text = lyrics.album
105  etree.SubElement(xml, "title").text = lyrics.title
106  etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False'
107  etree.SubElement(xml, "grabber").text = lyrics.source
108 
109  lines = lyrics.lyrics.splitlines()
110  for line in lines:
111  etree.SubElement(xml, "lyric").text = line
112 
113  utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8',
114  pretty_print=True, xml_declaration=True)))
115  sys.exit(0)
116 
118  from lxml import etree
119  version = etree.XML(u'<grabber></grabber>')
120  etree.SubElement(version, "name").text = __title__
121  etree.SubElement(version, "author").text = __author__
122  etree.SubElement(version, "command").text = 'lyricscom.py'
123  etree.SubElement(version, "type").text = 'lyrics'
124  etree.SubElement(version, "description").text = __description__
125  etree.SubElement(version, "version").text = __version__
126  etree.SubElement(version, "priority").text = __priority__
127  etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False'
128 
129  utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8',
130  pretty_print=True, xml_declaration=True)))
131  sys.exit(0)
132 
133 def main():
134  global debug
135 
136  parser = OptionParser()
137 
138  parser.add_option('-v', "--version", action="store_true", default=False,
139  dest="version", help="Display version and author")
140  parser.add_option('-t', "--test", action="store_true", default=False,
141  dest="test", help="Test grabber with a know good search")
142  parser.add_option('-s', "--search", action="store_true", default=False,
143  dest="search", help="Search for lyrics.")
144  parser.add_option('-a', "--artist", metavar="ARTIST", default=None,
145  dest="artist", help="Artist of track.")
146  parser.add_option('-b', "--album", metavar="ALBUM", default=None,
147  dest="album", help="Album of track.")
148  parser.add_option('-n', "--title", metavar="TITLE", default=None,
149  dest="title", help="Title of track.")
150  parser.add_option('-f', "--filename", metavar="FILENAME", default=None,
151  dest="filename", help="Filename of track.")
152  parser.add_option('-d', '--debug', action="store_true", default=False,
153  dest="debug", help=("Show debug messages"))
154 
155  opts, args = parser.parse_args()
156 
157  lyrics = utilities.Lyrics()
158  lyrics.source = __title__
159  lyrics.syncronized = __syncronized__
160 
161  if opts.debug:
162  debug = True
163 
164  if opts.version:
165  buildVersion()
166 
167  if opts.test:
169 
170  if opts.artist:
171  lyrics.artist = opts.artist
172  if opts.album:
173  lyrics.album = opts.album
174  if opts.title:
175  lyrics.title = opts.title
176  if opts.filename:
177  lyrics.filename = opts.filename
178 
179  fetcher = LyricsFetcher()
180  if fetcher.get_lyrics(lyrics):
181  buildLyrics(lyrics)
182  sys.exit(0)
183  else:
184  utilities.log(True, "No lyrics found for this track")
185  sys.exit(1)
186 
187 if __name__ == '__main__':
188  main()
lyricscom.buildLyrics
def buildLyrics(lyrics)
Definition: lyricscom.py:100
lyricscom.LyricsFetcher.get_lyrics
def get_lyrics(self, lyrics)
Definition: lyricscom.py:31
lyricscom.LyricsFetcher.__init__
def __init__(self)
Definition: lyricscom.py:28
lyricscom.LyricsFetcher.url
url
Definition: lyricscom.py:29
lyricscom.buildVersion
def buildVersion()
Definition: lyricscom.py:117
lyricscom.main
def main()
Definition: lyricscom.py:133
lyricscom.LyricsFetcher
Definition: lyricscom.py:27
lyricscom.performSelfTest
def performSelfTest()
Definition: lyricscom.py:74