3 Scraper for http://www.genius.com
8 from urllib.request
import urlopen, Request
9 from urllib.parse
import quote
10 from html
import parser
as html_parser
18 from hashlib
import md5
20 from optparse
import OptionParser
21 from common
import utilities
23 import json
as simplejson
25 __author__ =
"Paul Harrison and ronie'"
27 __description__ =
"Search http://www.genius.com for lyrics"
30 __syncronized__ =
False
35 socket.setdefaulttimeout(10)
39 self.
url =
'http://api.genius.com/search?q=%s%s%s&access_token=7pTrhwtmyQmccHoJX8HjXpYmyAdkbe19x5sjvwkf1UEIQTrPeXEm6LgylJi9GiPO'
42 utilities.log(debug,
"%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
45 request =
Request(self.
url % (quote(lyrics.artist),
'%20', quote(lyrics.title)))
46 request.add_header(
'User-Agent',
'Mozilla/5.0 (Windows NT 10.0; rv:77.0) Gecko/20100101 Firefox/77.0')
47 req = urlopen(request)
48 response = req.read().
decode(
'utf-8')
53 data = simplejson.loads(response)
56 name = data[
'response'][
'hits'][0][
'result'][
'primary_artist'][
'name']
57 track = data[
'response'][
'hits'][0][
'result'][
'title']
58 if (difflib.SequenceMatcher(
None, lyrics.artist.lower(), name.lower()).ratio() > 0.8)
and (difflib.SequenceMatcher(
None, lyrics.title.lower(), track.lower()).ratio() > 0.8):
59 self.
page = data[
'response'][
'hits'][0][
'result'][
'url']
65 utilities.log(debug,
"%s: search url: %s" % (__title__, self.
page))
69 request.add_header(
'User-Agent',
'Mozilla/5.0 (Windows NT 10.0; rv:77.0) Gecko/20100101 Firefox/77.0')
70 req = urlopen(request)
77 htmlparser = html_parser.HTMLParser()
78 response = htmlparser.unescape(response.decode(
'utf-8'))
81 response = html.unescape(response.decode(
'utf-8'))
82 matchcode = re.search(
u'<div class="[lL]yrics.*?">(.*?)</div>', response, flags=re.DOTALL)
84 lyricscode = (matchcode.group(1))
85 lyr = re.sub(
'<[^<]+?>',
'', lyricscode)
86 lyrics.lyrics = lyr.replace(
'\\n',
'\n').strip()
94 lyrics = utilities.Lyrics()
95 lyrics.source = __title__
96 lyrics.syncronized = __syncronized__
97 lyrics.artist =
'Dire Straits'
98 lyrics.album =
'Brothers In Arms'
99 lyrics.title =
'Money For Nothing'
102 found = fetcher.get_lyrics(lyrics)
105 utilities.log(
True,
"Everything appears in order.")
109 utilities.log(
True,
"The lyrics for the test search failed!")
113 from lxml
import etree
114 xml = etree.XML(
u'<lyrics></lyrics>')
115 etree.SubElement(xml,
"artist").text = lyrics.artist
116 etree.SubElement(xml,
"album").text = lyrics.album
117 etree.SubElement(xml,
"title").text = lyrics.title
118 etree.SubElement(xml,
"syncronized").text =
'True' if __syncronized__
else 'False'
119 etree.SubElement(xml,
"grabber").text = lyrics.source
121 lines = lyrics.lyrics.splitlines()
123 etree.SubElement(xml,
"lyric").text = line
125 utilities.log(
True, utilities.convert_etree(etree.tostring(xml, encoding=
'UTF-8',
126 pretty_print=
True, xml_declaration=
True)))
130 from lxml
import etree
131 version = etree.XML(
u'<grabber></grabber>')
132 etree.SubElement(version,
"name").text = __title__
133 etree.SubElement(version,
"author").text = __author__
134 etree.SubElement(version,
"command").text =
'minilyrics.py'
135 etree.SubElement(version,
"type").text =
'lyrics'
136 etree.SubElement(version,
"description").text = __description__
137 etree.SubElement(version,
"version").text = __version__
138 etree.SubElement(version,
"priority").text = __priority__
139 etree.SubElement(version,
"syncronized").text =
'True' if __syncronized__
else 'False'
141 utilities.log(
True, utilities.convert_etree(etree.tostring(version, encoding=
'UTF-8',
142 pretty_print=
True, xml_declaration=
True)))
148 parser = OptionParser()
150 parser.add_option(
'-v',
"--version", action=
"store_true", default=
False,
151 dest=
"version", help=
"Display version and author")
152 parser.add_option(
'-t',
"--test", action=
"store_true", default=
False,
153 dest=
"test", help=
"Test grabber with a know good search")
154 parser.add_option(
'-s',
"--search", action=
"store_true", default=
False,
155 dest=
"search", help=
"Search for lyrics.")
156 parser.add_option(
'-a',
"--artist", metavar=
"ARTIST", default=
None,
157 dest=
"artist", help=
"Artist of track.")
158 parser.add_option(
'-b',
"--album", metavar=
"ALBUM", default=
None,
159 dest=
"album", help=
"Album of track.")
160 parser.add_option(
'-n',
"--title", metavar=
"TITLE", default=
None,
161 dest=
"title", help=
"Title of track.")
162 parser.add_option(
'-f',
"--filename", metavar=
"FILENAME", default=
None,
163 dest=
"filename", help=
"Filename of track.")
164 parser.add_option(
'-d',
'--debug', action=
"store_true", default=
False,
165 dest=
"debug", help=(
"Show debug messages"))
167 opts, args = parser.parse_args()
169 lyrics = utilities.Lyrics()
170 lyrics.source = __title__
171 lyrics.syncronized = __syncronized__
183 lyrics.artist = opts.artist
185 lyrics.album = opts.album
187 lyrics.title = opts.title
189 lyrics.filename = opts.filename
192 if fetcher.get_lyrics(lyrics):
196 utilities.log(
True,
"No lyrics found for this track")
199 if __name__ ==
'__main__':