3 Scraper for http://www.lyrics.com/
12 from bs4
import BeautifulSoup
15 from optparse
import OptionParser
16 from common
import utilities
18 __author__ =
"Paul Harrison and ronie"
19 __title__ =
"Lyrics.Com"
20 __description__ =
"Search http://www.lyrics.com for lyrics"
23 __syncronized__ =
False
29 self.
url =
'http://www.lyrics.com/serp.php?st=%s&qtype=2'
32 utilities.log(debug,
"%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
34 sess = requests.Session()
37 request = sess.get(self.
url % urllib.parse.quote_plus(lyrics.artist), timeout=10)
38 response = request.text
41 soup = BeautifulSoup(response,
'html.parser')
43 for link
in soup.find_all(
'a'):
44 if link.string
and link.get(
'href').startswith(
'artist/'):
45 url =
'https://www.lyrics.com/' + link.get(
'href')
49 req = sess.get(url, timeout=10)
53 soup = BeautifulSoup(resp,
'html.parser')
55 for link
in soup.find_all(
'a'):
56 if link.string
and (difflib.SequenceMatcher(
None, link.string.lower(), lyrics.title.lower()).ratio() > 0.8):
57 url =
'https://www.lyrics.com' + link.get(
'href')
61 req2 = sess.get(url, timeout=10)
65 matchcode = re.search(
'<pre.*?>(.*?)</pre>', resp2, flags=re.DOTALL)
67 lyricscode = (matchcode.group(1))
68 lyr = re.sub(
'<[^<]+?>',
'', lyricscode)
69 lyrics.lyrics = lyr.replace(
'\\n',
'\n')
76 from bs4
import BeautifulSoup
78 utilities.log(
True,
"Failed to import BeautifulSoup. This grabber requires python-bs4")
82 lyrics = utilities.Lyrics()
83 lyrics.source = __title__
84 lyrics.syncronized = __syncronized__
85 lyrics.artist =
'Dire Straits'
86 lyrics.album =
'Brothers In Arms'
87 lyrics.title =
'Money For Nothing'
90 found = fetcher.get_lyrics(lyrics)
93 utilities.log(
True,
"Everything appears in order.")
97 utilities.log(
True,
"The lyrics for the test search failed!")
101 from lxml
import etree
102 xml = etree.XML(
u'<lyrics></lyrics>')
103 etree.SubElement(xml,
"artist").text = lyrics.artist
104 etree.SubElement(xml,
"album").text = lyrics.album
105 etree.SubElement(xml,
"title").text = lyrics.title
106 etree.SubElement(xml,
"syncronized").text =
'True' if __syncronized__
else 'False'
107 etree.SubElement(xml,
"grabber").text = lyrics.source
109 lines = lyrics.lyrics.splitlines()
111 etree.SubElement(xml,
"lyric").text = line
113 utilities.log(
True, utilities.convert_etree(etree.tostring(xml, encoding=
'UTF-8',
114 pretty_print=
True, xml_declaration=
True)))
118 from lxml
import etree
119 version = etree.XML(
u'<grabber></grabber>')
120 etree.SubElement(version,
"name").text = __title__
121 etree.SubElement(version,
"author").text = __author__
122 etree.SubElement(version,
"command").text =
'lyricscom.py'
123 etree.SubElement(version,
"type").text =
'lyrics'
124 etree.SubElement(version,
"description").text = __description__
125 etree.SubElement(version,
"version").text = __version__
126 etree.SubElement(version,
"priority").text = __priority__
127 etree.SubElement(version,
"syncronized").text =
'True' if __syncronized__
else 'False'
129 utilities.log(
True, utilities.convert_etree(etree.tostring(version, encoding=
'UTF-8',
130 pretty_print=
True, xml_declaration=
True)))
136 parser = OptionParser()
138 parser.add_option(
'-v',
"--version", action=
"store_true", default=
False,
139 dest=
"version", help=
"Display version and author")
140 parser.add_option(
'-t',
"--test", action=
"store_true", default=
False,
141 dest=
"test", help=
"Test grabber with a know good search")
142 parser.add_option(
'-s',
"--search", action=
"store_true", default=
False,
143 dest=
"search", help=
"Search for lyrics.")
144 parser.add_option(
'-a',
"--artist", metavar=
"ARTIST", default=
None,
145 dest=
"artist", help=
"Artist of track.")
146 parser.add_option(
'-b',
"--album", metavar=
"ALBUM", default=
None,
147 dest=
"album", help=
"Album of track.")
148 parser.add_option(
'-n',
"--title", metavar=
"TITLE", default=
None,
149 dest=
"title", help=
"Title of track.")
150 parser.add_option(
'-f',
"--filename", metavar=
"FILENAME", default=
None,
151 dest=
"filename", help=
"Filename of track.")
152 parser.add_option(
'-d',
'--debug', action=
"store_true", default=
False,
153 dest=
"debug", help=(
"Show debug messages"))
155 opts, args = parser.parse_args()
157 lyrics = utilities.Lyrics()
158 lyrics.source = __title__
159 lyrics.syncronized = __syncronized__
171 lyrics.artist = opts.artist
173 lyrics.album = opts.album
175 lyrics.title = opts.title
177 lyrics.filename = opts.filename
180 if fetcher.get_lyrics(lyrics):
184 utilities.log(
True,
"No lyrics found for this track")
187 if __name__ ==
'__main__':