3 Scraper for http://www.lyrdb.com/
10 from urllib.parse
import quote_plus
11 from urllib.request
import urlopen
15 from optparse
import OptionParser
16 from common
import utilities
18 __author__ =
"Paul Harrison and 'ronie'"
19 __title__ =
"Lyrics.Com"
20 __description__ =
"Search http://www.lyrics.com for lyrics"
23 __syncronized__ =
False
27 socket.setdefaulttimeout(10)
31 self.
url =
'http://www.lyrics.com/serp.php?st=%s&qtype=2'
34 utilities.log(debug,
"%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
37 from bs4
import BeautifulSoup
39 utilities.log(
True,
"Failed to import BeautifulSoup. This grabber requires python-bs4")
43 request = urlopen(self.
url % quote_plus(lyrics.artist))
44 response = request.read()
49 soup = BeautifulSoup(response,
'html.parser')
51 for link
in soup.find_all(
'a'):
52 if link.string
and link.get(
'href').startswith(
'artist/'):
53 url =
'http://www.lyrics.com/' + link.get(
'href')
56 utilities.log(debug,
"%s: Artist url is %s" % (__title__, url))
59 resp = req.read().
decode(
'utf-8')
63 soup = BeautifulSoup(resp,
'html.parser')
65 for link
in soup.find_all(
'a'):
66 if link.string
and link.get(
'href').startswith(
'/lyric/')
and (difflib.SequenceMatcher(
None, link.string.lower(), lyrics.title.lower()).ratio() > 0.8):
67 url =
'http://www.lyrics.com' + link.get(
'href')
71 utilities.log(debug,
"%s: Song url is %s" % (__title__, url))
75 resp2 = req2.read().
decode(
'utf-8')
80 matchcode = re.search(
u'<pre.*?>(.*?)</pre>', resp2, flags=re.DOTALL)
82 lyricscode = (matchcode.group(1))
83 lyr = re.sub(
u'<[^<]+?>',
'', lyricscode)
84 lyrics.lyrics = lyr.replace(
'\\n',
'\n')
91 from bs4
import BeautifulSoup
93 utilities.log(
True,
"Failed to import BeautifulSoup. This grabber requires python-bs4")
97 lyrics = utilities.Lyrics()
98 lyrics.source = __title__
99 lyrics.syncronized = __syncronized__
100 lyrics.artist =
'Dire Straits'
101 lyrics.album =
'Brothers In Arms'
102 lyrics.title =
'Money For Nothing'
105 found = fetcher.get_lyrics(lyrics)
108 utilities.log(
True,
"Everything appears in order.")
112 utilities.log(
True,
"The lyrics for the test search failed!")
116 from lxml
import etree
117 xml = etree.XML(
u'<lyrics></lyrics>')
118 etree.SubElement(xml,
"artist").text = lyrics.artist
119 etree.SubElement(xml,
"album").text = lyrics.album
120 etree.SubElement(xml,
"title").text = lyrics.title
121 etree.SubElement(xml,
"syncronized").text =
'True' if __syncronized__
else 'False'
122 etree.SubElement(xml,
"grabber").text = lyrics.source
124 lines = lyrics.lyrics.splitlines()
126 etree.SubElement(xml,
"lyric").text = line
128 utilities.log(
True, utilities.convert_etree(etree.tostring(xml, encoding=
'UTF-8',
129 pretty_print=
True, xml_declaration=
True)))
133 from lxml
import etree
134 version = etree.XML(
u'<grabber></grabber>')
135 etree.SubElement(version,
"name").text = __title__
136 etree.SubElement(version,
"author").text = __author__
137 etree.SubElement(version,
"command").text =
'lyricscom.py'
138 etree.SubElement(version,
"type").text =
'lyrics'
139 etree.SubElement(version,
"description").text = __description__
140 etree.SubElement(version,
"version").text = __version__
141 etree.SubElement(version,
"priority").text = __priority__
142 etree.SubElement(version,
"syncronized").text =
'True' if __syncronized__
else 'False'
144 utilities.log(
True, utilities.convert_etree(etree.tostring(version, encoding=
'UTF-8',
145 pretty_print=
True, xml_declaration=
True)))
151 parser = OptionParser()
153 parser.add_option(
'-v',
"--version", action=
"store_true", default=
False,
154 dest=
"version", help=
"Display version and author")
155 parser.add_option(
'-t',
"--test", action=
"store_true", default=
False,
156 dest=
"test", help=
"Test grabber with a know good search")
157 parser.add_option(
'-s',
"--search", action=
"store_true", default=
False,
158 dest=
"search", help=
"Search for lyrics.")
159 parser.add_option(
'-a',
"--artist", metavar=
"ARTIST", default=
None,
160 dest=
"artist", help=
"Artist of track.")
161 parser.add_option(
'-b',
"--album", metavar=
"ALBUM", default=
None,
162 dest=
"album", help=
"Album of track.")
163 parser.add_option(
'-n',
"--title", metavar=
"TITLE", default=
None,
164 dest=
"title", help=
"Title of track.")
165 parser.add_option(
'-f',
"--filename", metavar=
"FILENAME", default=
None,
166 dest=
"filename", help=
"Filename of track.")
167 parser.add_option(
'-d',
'--debug', action=
"store_true", default=
False,
168 dest=
"debug", help=(
"Show debug messages"))
170 opts, args = parser.parse_args()
172 lyrics = utilities.Lyrics()
173 lyrics.source = __title__
174 lyrics.syncronized = __syncronized__
186 lyrics.artist = opts.artist
188 lyrics.album = opts.album
190 lyrics.title = opts.title
192 lyrics.filename = opts.filename
195 if fetcher.get_lyrics(lyrics):
199 utilities.log(
True,
"No lyrics found for this track")
202 if __name__ ==
'__main__':