MythTV  master
genius.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 """
3 Scraper for http://www.genius.com
4 
5 taxigps
6 """
7 import sys
8 try:
9  from urllib2 import quote, urlopen, Request
10 except ImportError:
11  from urllib.request import urlopen, Request
12  from urllib.parse import quote
13 try:
14  import HTMLParser as html_parser
15 except ImportError:
16  from html import parser as html_parser
17 import socket
18 import re
19 from hashlib import md5
20 import difflib
21 from optparse import OptionParser
22 from common import utilities
23 
24 if sys.version_info < (2, 7):
25  import simplejson
26 else:
27  import json as simplejson
28 
29 __author__ = "Paul Harrison and ronie'"
30 __title__ = "Genius"
31 __description__ = "Search http://www.genius.com for lyrics"
32 __priority__ = "160"
33 __version__ = "0.1"
34 __syncronized__ = False
35 
36 
37 debug = False
38 
39 socket.setdefaulttimeout(10)
40 
42  def __init__( self ):
43  self.url = 'http://api.genius.com/search?q=%s%s%s&access_token=7pTrhwtmyQmccHoJX8HjXpYmyAdkbe19x5sjvwkf1UEIQTrPeXEm6LgylJi9GiPO'
44 
45  def get_lyrics(self, lyrics):
46  utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
47 
48  try:
49  request = Request(self.url % (quote(lyrics.artist), '%20', quote(lyrics.title)))
50  request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0')
51  req = urlopen(request)
52  response = req.read().decode('utf-8')
53  except:
54  return False
55 
56  req.close()
57  data = simplejson.loads(response)
58 
59  try:
60  self.page = data['response']['hits'][0]['result']['url']
61  except:
62  return False
63 
64  utilities.log(debug, "%s: search url: %s" % (__title__, self.page))
65 
66  try:
67  request = Request(self.page)
68  request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0')
69  req = urlopen(request)
70  response = req.read().decode('utf-8')
71  except:
72  return False
73 
74  req.close()
75  matchcode = re.search(u'<div class="lyrics">(.*?)</div>', response, flags=re.DOTALL)
76 
77  try:
78  lyricscode = (matchcode.group(1))
79  htmlparser = html_parser.HTMLParser()
80  lyricstext = htmlparser.unescape(lyricscode).replace(u'<br />', u'\n')
81  templyr = re.sub(u'<[^<]+?>', '', lyricstext)
82  lyr = re.sub(u'\[(.*?)\]', '', templyr)
83  lyrics.lyrics = lyr.strip().replace(u'\n\n\n', u'\n\n')
84  return True
85  except:
86  return False
87 
88 
90  found = False
91  lyrics = utilities.Lyrics()
92  lyrics.source = __title__
93  lyrics.syncronized = __syncronized__
94  lyrics.artist = 'Dire Straits'
95  lyrics.album = 'Brothers In Arms'
96  lyrics.title = 'Money For Nothing'
97 
98  fetcher = LyricsFetcher()
99  found = fetcher.get_lyrics(lyrics)
100 
101  if found:
102  utilities.log(True, "Everything appears in order.")
103  buildLyrics(lyrics)
104  sys.exit(0)
105 
106  utilities.log(True, "The lyrics for the test search failed!")
107  sys.exit(1)
108 
109 def buildLyrics(lyrics):
110  from lxml import etree
111  xml = etree.XML(u'<lyrics></lyrics>')
112  etree.SubElement(xml, "artist").text = lyrics.artist
113  etree.SubElement(xml, "album").text = lyrics.album
114  etree.SubElement(xml, "title").text = lyrics.title
115  etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False'
116  etree.SubElement(xml, "grabber").text = lyrics.source
117 
118  lines = lyrics.lyrics.splitlines()
119  for line in lines:
120  etree.SubElement(xml, "lyric").text = line
121 
122  utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8',
123  pretty_print=True, xml_declaration=True)))
124  sys.exit(0)
125 
127  from lxml import etree
128  version = etree.XML(u'<grabber></grabber>')
129  etree.SubElement(version, "name").text = __title__
130  etree.SubElement(version, "author").text = __author__
131  etree.SubElement(version, "command").text = 'minilyrics.py'
132  etree.SubElement(version, "type").text = 'lyrics'
133  etree.SubElement(version, "description").text = __description__
134  etree.SubElement(version, "version").text = __version__
135  etree.SubElement(version, "priority").text = __priority__
136  etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False'
137 
138  utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8',
139  pretty_print=True, xml_declaration=True)))
140  sys.exit(0)
141 
142 def main():
143  global debug
144 
145  parser = OptionParser()
146 
147  parser.add_option('-v', "--version", action="store_true", default=False,
148  dest="version", help="Display version and author")
149  parser.add_option('-t', "--test", action="store_true", default=False,
150  dest="test", help="Test grabber with a know good search")
151  parser.add_option('-s', "--search", action="store_true", default=False,
152  dest="search", help="Search for lyrics.")
153  parser.add_option('-a', "--artist", metavar="ARTIST", default=None,
154  dest="artist", help="Artist of track.")
155  parser.add_option('-b', "--album", metavar="ALBUM", default=None,
156  dest="album", help="Album of track.")
157  parser.add_option('-n', "--title", metavar="TITLE", default=None,
158  dest="title", help="Title of track.")
159  parser.add_option('-f', "--filename", metavar="FILENAME", default=None,
160  dest="filename", help="Filename of track.")
161  parser.add_option('-d', '--debug', action="store_true", default=False,
162  dest="debug", help=("Show debug messages"))
163 
164  opts, args = parser.parse_args()
165 
166  lyrics = utilities.Lyrics()
167  lyrics.source = __title__
168  lyrics.syncronized = __syncronized__
169 
170  if opts.debug:
171  debug = True
172 
173  if opts.version:
174  buildVersion()
175 
176  if opts.test:
178 
179  if opts.artist:
180  lyrics.artist = opts.artist
181  if opts.album:
182  lyrics.album = opts.album
183  if opts.title:
184  lyrics.title = opts.title
185  if opts.filename:
186  lyrics.filename = opts.filename
187 
188  fetcher = LyricsFetcher()
189  if fetcher.get_lyrics(lyrics):
190  buildLyrics(lyrics)
191  sys.exit(0)
192  else:
193  utilities.log(True, "No lyrics found for this track")
194  sys.exit(1)
195 
196 if __name__ == '__main__':
197  main()
198 
genius.buildVersion
def buildVersion()
Definition: genius.py:126
genius.LyricsFetcher.page
page
Definition: genius.py:60
genius.LyricsFetcher.__init__
def __init__(self)
Definition: genius.py:42
genius.LyricsFetcher.url
url
Definition: genius.py:43
decode
static int decode(unsigned char *vbiline, int scale0, int scale1)
Definition: cc.cpp:70
genius.buildLyrics
def buildLyrics(lyrics)
Definition: genius.py:109
genius.LyricsFetcher
Definition: genius.py:41
hardwareprofile.distros.mythtv_data.request.Request
def Request(url=None)
Definition: distros/mythtv_data/request.py:64
genius.main
def main()
Definition: genius.py:142
genius.LyricsFetcher.get_lyrics
def get_lyrics(self, lyrics)
Definition: genius.py:45
genius.performSelfTest
def performSelfTest()
Definition: genius.py:89