MythTV  master
genius.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 """
3 Scraper for http://www.genius.com
4 
5 taxigps
6 """
7 import sys
8 from urllib.request import urlopen, Request
9 from urllib.parse import quote
10 from html import parser as html_parser
11 try:
12  # may be necessary for python 3.10
13  import html
14 except:
15  pass
16 import socket
17 import re
18 from hashlib import md5
19 import difflib
20 from optparse import OptionParser
21 from common import utilities
22 
23 import json as simplejson
24 
25 __author__ = "Paul Harrison and ronie'"
26 __title__ = "Genius"
27 __description__ = "Search http://www.genius.com for lyrics"
28 __priority__ = "160"
29 __version__ = "0.1"
30 __syncronized__ = False
31 
32 
33 debug = False
34 
35 socket.setdefaulttimeout(10)
36 
38  def __init__( self ):
39  self.url = 'http://api.genius.com/search?q=%s%s%s&access_token=7pTrhwtmyQmccHoJX8HjXpYmyAdkbe19x5sjvwkf1UEIQTrPeXEm6LgylJi9GiPO'
40 
41  def get_lyrics(self, lyrics):
42  utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
43 
44  try:
45  request = Request(self.url % (quote(lyrics.artist), '%20', quote(lyrics.title)))
46  request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; rv:77.0) Gecko/20100101 Firefox/77.0')
47  req = urlopen(request)
48  response = req.read().decode('utf-8')
49  except:
50  return False
51 
52  req.close()
53  data = simplejson.loads(response)
54 
55  try:
56  name = data['response']['hits'][0]['result']['primary_artist']['name']
57  track = data['response']['hits'][0]['result']['title']
58  if (difflib.SequenceMatcher(None, lyrics.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, lyrics.title.lower(), track.lower()).ratio() > 0.8):
59  self.page = data['response']['hits'][0]['result']['url']
60  else:
61  return None
62  except:
63  return False
64 
65  utilities.log(debug, "%s: search url: %s" % (__title__, self.page))
66 
67  try:
68  request = Request(self.page)
69  request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; rv:77.0) Gecko/20100101 Firefox/77.0')
70  req = urlopen(request)
71  response = req.read()
72  except:
73  return False
74 
75  req.close()
76  try:
77  htmlparser = html_parser.HTMLParser()
78  response = htmlparser.unescape(response.decode('utf-8'))
79  except:
80  # may be necessary for python 3.10
81  response = html.unescape(response.decode('utf-8'))
82  matchcode = re.search(u'<div class="[lL]yrics.*?">(.*?)</div>', response, flags=re.DOTALL)
83  try:
84  lyricscode = (matchcode.group(1))
85  lyr = re.sub('<[^<]+?>', '', lyricscode)
86  lyrics.lyrics = lyr.replace('\\n','\n').strip()
87  return True
88  except:
89  return False
90 
91 
93  found = False
94  lyrics = utilities.Lyrics()
95  lyrics.source = __title__
96  lyrics.syncronized = __syncronized__
97  lyrics.artist = 'Dire Straits'
98  lyrics.album = 'Brothers In Arms'
99  lyrics.title = 'Money For Nothing'
100 
101  fetcher = LyricsFetcher()
102  found = fetcher.get_lyrics(lyrics)
103 
104  if found:
105  utilities.log(True, "Everything appears in order.")
106  buildLyrics(lyrics)
107  sys.exit(0)
108 
109  utilities.log(True, "The lyrics for the test search failed!")
110  sys.exit(1)
111 
112 def buildLyrics(lyrics):
113  from lxml import etree
114  xml = etree.XML(u'<lyrics></lyrics>')
115  etree.SubElement(xml, "artist").text = lyrics.artist
116  etree.SubElement(xml, "album").text = lyrics.album
117  etree.SubElement(xml, "title").text = lyrics.title
118  etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False'
119  etree.SubElement(xml, "grabber").text = lyrics.source
120 
121  lines = lyrics.lyrics.splitlines()
122  for line in lines:
123  etree.SubElement(xml, "lyric").text = line
124 
125  utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8',
126  pretty_print=True, xml_declaration=True)))
127  sys.exit(0)
128 
130  from lxml import etree
131  version = etree.XML(u'<grabber></grabber>')
132  etree.SubElement(version, "name").text = __title__
133  etree.SubElement(version, "author").text = __author__
134  etree.SubElement(version, "command").text = 'minilyrics.py'
135  etree.SubElement(version, "type").text = 'lyrics'
136  etree.SubElement(version, "description").text = __description__
137  etree.SubElement(version, "version").text = __version__
138  etree.SubElement(version, "priority").text = __priority__
139  etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False'
140 
141  utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8',
142  pretty_print=True, xml_declaration=True)))
143  sys.exit(0)
144 
145 def main():
146  global debug
147 
148  parser = OptionParser()
149 
150  parser.add_option('-v', "--version", action="store_true", default=False,
151  dest="version", help="Display version and author")
152  parser.add_option('-t', "--test", action="store_true", default=False,
153  dest="test", help="Test grabber with a know good search")
154  parser.add_option('-s', "--search", action="store_true", default=False,
155  dest="search", help="Search for lyrics.")
156  parser.add_option('-a', "--artist", metavar="ARTIST", default=None,
157  dest="artist", help="Artist of track.")
158  parser.add_option('-b', "--album", metavar="ALBUM", default=None,
159  dest="album", help="Album of track.")
160  parser.add_option('-n', "--title", metavar="TITLE", default=None,
161  dest="title", help="Title of track.")
162  parser.add_option('-f', "--filename", metavar="FILENAME", default=None,
163  dest="filename", help="Filename of track.")
164  parser.add_option('-d', '--debug', action="store_true", default=False,
165  dest="debug", help=("Show debug messages"))
166 
167  opts, args = parser.parse_args()
168 
169  lyrics = utilities.Lyrics()
170  lyrics.source = __title__
171  lyrics.syncronized = __syncronized__
172 
173  if opts.debug:
174  debug = True
175 
176  if opts.version:
177  buildVersion()
178 
179  if opts.test:
181 
182  if opts.artist:
183  lyrics.artist = opts.artist
184  if opts.album:
185  lyrics.album = opts.album
186  if opts.title:
187  lyrics.title = opts.title
188  if opts.filename:
189  lyrics.filename = opts.filename
190 
191  fetcher = LyricsFetcher()
192  if fetcher.get_lyrics(lyrics):
193  buildLyrics(lyrics)
194  sys.exit(0)
195  else:
196  utilities.log(True, "No lyrics found for this track")
197  sys.exit(1)
198 
199 if __name__ == '__main__':
200  main()
201 
genius.buildVersion
def buildVersion()
Definition: genius.py:129
genius.LyricsFetcher.page
page
Definition: genius.py:59
genius.LyricsFetcher.__init__
def __init__(self)
Definition: genius.py:38
genius.LyricsFetcher.url
url
Definition: genius.py:39
decode
static int decode(unsigned char *vbiline, int scale0, int scale1)
Definition: cc.cpp:67
genius.buildLyrics
def buildLyrics(lyrics)
Definition: genius.py:112
genius.LyricsFetcher
Definition: genius.py:37
hardwareprofile.distros.mythtv_data.request.Request
def Request(url=None)
Definition: distros/mythtv_data/request.py:64
genius.main
def main()
Definition: genius.py:145
genius.LyricsFetcher.get_lyrics
def get_lyrics(self, lyrics)
Definition: genius.py:41
genius.performSelfTest
def performSelfTest()
Definition: genius.py:92