MythTV  master
darklyrics.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 """
3 Scraper for http://www.darklyrics.com/ - the largest metal lyrics archive on the Web.
4 
5 scraper by smory
6 """
7 
8 import hashlib
9 try:
10  from urllib2 import quote, urlopen
11 except ImportError:
12  from urllib.request import urlopen
13  from urllib.parse import quote
14 import re
15 import chardet
16 import sys
17 from optparse import OptionParser
18 from common import utilities
19 
20 __author__ = "Paul Harrison and smory'"
21 __title__ = "DarkLyrics"
22 __description__ = "Search http://www.darklyrics.com/ - the largest metal lyrics archive on the Web"
23 __priority__ = "180"
24 __version__ = "0.1"
25 __syncronized__ = False
26 
27 debug = False
28 
30 
31  def __init__( self ):
32  self.base_url = "http://www.darklyrics.com/"
33  self.searchUrl = "http://www.darklyrics.com/search?q=%term%"
34 
35  def search(self, artist, title):
36  term = quote((artist if artist else "") + " " + (title if title else ""))
37 
38  try:
39  request = urlopen(self.searchUrl.replace("%term%", term))
40  searchResponse = request.read()
41  except:
42  return None
43 
44  searchResult = re.findall(b"<h2><a\shref=\"(.*?#([0-9]+))\".*?>(.*?)</a></h2>", searchResponse)
45 
46  if len(searchResult) == 0:
47  return None
48 
49  links = []
50 
51  i = 0
52  for result in searchResult:
53  a = []
54  a.append(result[2] + ( b" " + self.getAlbumName(self.base_url + result[0].decode('utf-8') )if i < 6 else b"")) # title from server + album nane
55  a.append(self.base_url + result[0].decode('utf-8')) # url with lyrics
56  a.append(artist)
57  a.append(title)
58  a.append(result[1]) # id of the side part containing this song lyrics
59  links.append(a)
60  i += 1
61 
62  return links
63 
64  def findLyrics(self, url, index):
65  try:
66  request = urlopen(url)
67  res = request.read()
68  except:
69  return None
70 
71  pattern = b"<a\sname=\"%index%\">(.*?)(?:<h3>|<div)" # require multi line and dot all mode
72  pattern = pattern.replace(b"%index%", index)
73 
74  match = re.search(pattern, res, re.MULTILINE | re.DOTALL)
75  if match:
76  s = match.group(1)
77  s = s.replace(b"<br />", b"")
78  s = s.replace(b"<i>", b"")
79  s = s.replace(b"</i>", b"")
80  s = s.replace(b"</a>", b"")
81  s = s.replace(b"</h3>", b"")
82  return s
83  else:
84  return None
85 
86  def getAlbumName(self, url):
87  try:
88  request = urlopen(url)
89  res = request.read()
90  except:
91  return b""
92 
93  match = re.search(b"<h2>(?:album|single|ep|live):?\s?(.*?)</h2>", res, re.IGNORECASE)
94 
95  if match:
96  ret = (b"(" + match.group(1) + b")").replace(b"\"", b"")
97  else:
98  ret = b""
99  return(ret)
100 
101 
102  def get_lyrics(self, lyrics):
103  utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
104  links = self.search(lyrics.artist, lyrics.title)
105 
106  if(links == None or len(links) == 0):
107  return False
108  elif len(links) > 1:
109  lyrics.list = links
110 
111  lyr = self.get_lyrics_from_list(links[0])
112  if not lyr:
113  return False
114 
115  enc = chardet.detect(lyr)
116  lyr = lyr.decode(enc['encoding'], 'ignore')
117  lyrics.lyrics = lyr
118  return True
119 
120  def get_lyrics_from_list(self, link):
121  title, url, artist, song, index = link
122  return self.findLyrics(url, index)
123 
125  found = False
126  lyrics = utilities.Lyrics()
127  lyrics.source = __title__
128  lyrics.syncronized = __syncronized__
129  lyrics.artist = 'Dagon'
130  lyrics.album = 'Terraphobic'
131  lyrics.title = 'Cut To The Heart'
132 
133  fetcher = LyricsFetcher()
134  found = fetcher.get_lyrics(lyrics)
135 
136  if found:
137  utilities.log(True, "Everything appears in order.")
138  buildLyrics(lyrics)
139  sys.exit(0)
140 
141  utilities.log(True, "The lyrics for the test search failed!")
142  sys.exit(1)
143 
144 def buildLyrics(lyrics):
145  from lxml import etree
146  xml = etree.XML(u'<lyrics></lyrics>')
147  etree.SubElement(xml, "artist").text = lyrics.artist
148  etree.SubElement(xml, "album").text = lyrics.album
149  etree.SubElement(xml, "title").text = lyrics.title
150  etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False'
151  etree.SubElement(xml, "grabber").text = lyrics.source
152 
153  lines = lyrics.lyrics.splitlines()
154  for line in lines:
155  line2 = re.sub(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\u10000-\u10FFFF]+', '', line)
156  etree.SubElement(xml, "lyric").text = line2
157 
158  utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8',
159  pretty_print=True, xml_declaration=True)))
160  sys.exit(0)
161 
163  from lxml import etree
164  version = etree.XML(u'<grabber></grabber>')
165  etree.SubElement(version, "name").text = __title__
166  etree.SubElement(version, "author").text = __author__
167  etree.SubElement(version, "command").text = 'darklyrics.py'
168  etree.SubElement(version, "type").text = 'lyrics'
169  etree.SubElement(version, "description").text = __description__
170  etree.SubElement(version, "version").text = __version__
171  etree.SubElement(version, "priority").text = __priority__
172  etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False'
173 
174  utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8',
175  pretty_print=True, xml_declaration=True)))
176  sys.exit(0)
177 
178 def main():
179  global debug
180 
181  parser = OptionParser()
182 
183  parser.add_option('-v', "--version", action="store_true", default=False,
184  dest="version", help="Display version and author")
185  parser.add_option('-t', "--test", action="store_true", default=False,
186  dest="test", help="Test grabber with a know good search")
187  parser.add_option('-s', "--search", action="store_true", default=False,
188  dest="search", help="Search for lyrics.")
189  parser.add_option('-a', "--artist", metavar="ARTIST", default=None,
190  dest="artist", help="Artist of track.")
191  parser.add_option('-b', "--album", metavar="ALBUM", default=None,
192  dest="album", help="Album of track.")
193  parser.add_option('-n', "--title", metavar="TITLE", default=None,
194  dest="title", help="Title of track.")
195  parser.add_option('-f', "--filename", metavar="FILENAME", default=None,
196  dest="filename", help="Filename of track.")
197  parser.add_option('-d', '--debug', action="store_true", default=False,
198  dest="debug", help=("Show debug messages"))
199  opts, args = parser.parse_args()
200 
201  lyrics = utilities.Lyrics()
202  lyrics.source = __title__
203  lyrics.syncronized = __syncronized__
204 
205  if opts.debug:
206  debug = True
207 
208  if opts.version:
209  buildVersion()
210 
211  if opts.test:
213 
214  if opts.artist:
215  lyrics.artist = opts.artist
216  if opts.album:
217  lyrics.album = opts.album
218  if opts.title:
219  lyrics.title = opts.title
220  if opts.filename:
221  lyrics.filename = opts.filename
222 
223  fetcher = LyricsFetcher()
224  if fetcher.get_lyrics(lyrics):
225  buildLyrics(lyrics)
226  sys.exit(0)
227  else:
228  utilities.log(True, "No lyrics found for this track")
229  sys.exit(1)
230 
231 if __name__ == '__main__':
232  main()
darklyrics.performSelfTest
def performSelfTest()
Definition: darklyrics.py:124
darklyrics.LyricsFetcher.search
def search(self, artist, title)
Definition: darklyrics.py:35
decode
static int decode(unsigned char *vbiline, int scale0, int scale1)
Definition: cc.cpp:70
if
if(query.exec() &&query.next())
Definition: mythplugins/mytharchive/mytharchivehelper/main.cpp:461
darklyrics.LyricsFetcher.searchUrl
searchUrl
Definition: darklyrics.py:33
darklyrics.LyricsFetcher.get_lyrics_from_list
def get_lyrics_from_list(self, link)
Definition: darklyrics.py:120
darklyrics.LyricsFetcher.base_url
base_url
Definition: darklyrics.py:32
darklyrics.LyricsFetcher.__init__
def __init__(self)
Definition: darklyrics.py:31
darklyrics.buildLyrics
def buildLyrics(lyrics)
Definition: darklyrics.py:144
darklyrics.main
def main()
Definition: darklyrics.py:178
darklyrics.buildVersion
def buildVersion()
Definition: darklyrics.py:162
return
return
Definition: mythplugins/mytharchive/mytharchivehelper/main.cpp:1250
darklyrics.LyricsFetcher
Definition: darklyrics.py:29
darklyrics.LyricsFetcher.getAlbumName
def getAlbumName(self, url)
Definition: darklyrics.py:86
darklyrics.LyricsFetcher.get_lyrics
def get_lyrics(self, lyrics)
Definition: darklyrics.py:102
darklyrics.LyricsFetcher.findLyrics
def findLyrics(self, url, index)
Definition: darklyrics.py:64