MythTV  master
lyricsScraper.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 '''
3 Scraper for http://www.viewlyrics.com
4 
5 PedroHLC
6 https://github.com/PedroHLC/ViewLyricsOpenSearcher
7 
8 rikels
9 https://github.com/rikels/LyricsSearch
10 '''
11 
12 import re
13 import hashlib
14 import difflib
15 import chardet
16 import requests
17 from lib.utils import *
18 
19 __title__ = 'MiniLyrics'
20 __priority__ = '100'
21 __lrc__ = True
22 
23 
24 class MiniLyrics(object):
25  '''
26  Minilyrics specific functions
27  '''
28  @staticmethod
29  def hexToStr(hexx):
30  string = ''
31  i = 0
32  while (i < (len(hexx) - 1)):
33  string += chr(int(hexx[i] + hexx[i + 1], 16))
34  i += 2
35  return string
36 
37  @staticmethod
38  def vl_enc(data, md5_extra):
39  datalen = len(data)
40  md5 = hashlib.md5()
41  md5.update(data + md5_extra)
42  hasheddata = MiniLyrics.hexToStr(md5.hexdigest())
43  j = 0
44  i = 0
45  while (i < datalen):
46  try:
47  j += data[i]
48  except TypeError:
49  j += ord(data[i])
50  i += 1
51  magickey = chr(int(round(float(j) / float(datalen))))
52  encddata = list(range(len(data)))
53  if isinstance(magickey, int):
54  pass
55  else:
56  magickey = ord(magickey)
57  for i in range(datalen):
58  if isinstance(data[i], int):
59  encddata[i] = data[i] ^ magickey
60  else:
61  encddata[i] = ord(data[i]) ^ magickey
62  try:
63  result = '\x02' + chr(magickey) + '\x04\x00\x00\x00' + str(hasheddata) + bytearray(encddata).decode('utf-8')
64  except UnicodeDecodeError:
65  ecd = chardet.detect(bytearray(encddata))
66  if ecd['encoding']:
67  try:
68  result = '\x02' + chr(magickey) + '\x04\x00\x00\x00' + str(hasheddata) + bytearray(encddata).decode(ecd['encoding'])
69  except:
70  result = '\x02' + chr(magickey) + '\x04\x00\x00\x00' + str(hasheddata) + "".join(map(chr, bytearray(encddata)))
71  else:
72  result = '\x02' + chr(magickey) + '\x04\x00\x00\x00' + str(hasheddata) + "".join(map(chr, bytearray(encddata)))
73  return result
74 
75  @staticmethod
76  def vl_dec(data):
77  magickey = data[1]
78  result = ""
79  i = 22
80  datalen = len(data)
81  if isinstance(magickey, int):
82  pass
83  else:
84  magickey = ord(magickey)
85  for i in range(22, datalen):
86  if isinstance(data[i], int):
87  result += chr(data[i] ^ magickey)
88  else:
89  result += chr(ord(data[i]) ^ magickey)
90  return result
91 
93  def __init__(self, *args, **kwargs):
94  self.DEBUG = kwargs['debug']
95  self.settings = kwargs['settings']
96  self.proxy = None
97 
98  def htmlDecode(self,string):
99  entities = {'&apos;':'\'','&quot;':'"','&gt;':'>','&lt;':'<','&amp;':'&'}
100  for i in entities:
101  string = string.replace(i,entities[i])
102  return string
103 
104  def get_lyrics(self, song):
105  log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title), debug=self.DEBUG)
106  lyrics = Lyrics(settings=self.settings)
107  lyrics.song = song
108  lyrics.source = __title__
109  lyrics.lrc = __lrc__
110  search_url = 'http://search.crintsoft.com/searchlyrics.htm'
111  search_query_base = "<?xml version='1.0' encoding='utf-8' standalone='yes' ?><searchV1 client=\"ViewLyricsOpenSearcher\" artist=\"{artist}\" title=\"{title}\" OnlyMatched=\"1\" />"
112  search_useragent = 'MiniLyrics'
113  search_md5watermark = b'Mlv1clt4.0'
114  search_encquery = MiniLyrics.vl_enc(search_query_base.format(artist=song.artist, title=song.title).encode('utf-8'), search_md5watermark)
115  headers = {"User-Agent": "{ua}".format(ua=search_useragent),
116  "Content-Length": "{content_length}".format(content_length=len(search_encquery)),
117  "Connection": "Keep-Alive",
118  "Expect": "100-continue",
119  "Content-Type": "application/x-www-form-urlencoded"
120  }
121  try:
122  request = requests.post(search_url, data=search_encquery, headers=headers, timeout=10)
123  search_result = request.text
124  except:
125  return
126  rawdata = MiniLyrics.vl_dec(search_result)
127  # might be a better way to parse the data
128  lrcdata = rawdata.replace('\x00', '*')
129  artistmatch = re.search('artist\*(.*?)\*',lrcdata)
130  if not artistmatch:
131  return
132  titlematch = re.search('title\*(.*?)\*',lrcdata)
133  if not titlematch:
134  return
135  artist = artistmatch.group(1)
136  title = titlematch.group(1)
137  links = []
138  if (difflib.SequenceMatcher(None, song.artist.lower(), artist.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, song.title.lower(), title.lower()).ratio() > 0.8):
139  results = re.findall('[a-z0-9/_]*?\.lrc', lrcdata)
140  for item in results:
141  links.append((artist + ' - ' + title, item, artist, title))
142  if len(links) == 0:
143  return None
144  elif len(links) > 1:
145  lyrics.list = links
146  lyr = self.get_lyrics_from_list(links[0])
147  if not lyr:
148  return None
149  lyrics.lyrics = lyr
150  return lyrics
151 
152  def get_lyrics_from_list(self, link):
153  title,url,artist,song = link
154  try:
155  f = requests.get('http://search.crintsoft.com/l/' + url, timeout=10)
156  lyrics = f.content
157  except:
158  return
159  enc = chardet.detect(lyrics)
160  lyrics = lyrics.decode(enc['encoding'], 'ignore')
161  return lyrics
minilyrics.lyricsScraper.LyricsFetcher.proxy
proxy
Definition: lyricsScraper.py:96
minilyrics.lyricsScraper.LyricsFetcher.settings
settings
Definition: lyricsScraper.py:95
minilyrics.lyricsScraper.LyricsFetcher.get_lyrics
def get_lyrics(self, song)
Definition: lyricsScraper.py:104
utils
minilyrics.lyricsScraper.LyricsFetcher.get_lyrics_from_list
def get_lyrics_from_list(self, link)
Definition: lyricsScraper.py:152
minilyrics.lyricsScraper.MiniLyrics
Definition: lyricsScraper.py:24
minilyrics.lyricsScraper.LyricsFetcher.__init__
def __init__(self, *args, **kwargs)
Definition: lyricsScraper.py:93
minilyrics.lyricsScraper.MiniLyrics.hexToStr
def hexToStr(hexx)
Definition: lyricsScraper.py:29
minilyrics.lyricsScraper.MiniLyrics.vl_dec
def vl_dec(data)
Definition: lyricsScraper.py:76
minilyrics.lyricsScraper.LyricsFetcher.htmlDecode
def htmlDecode(self, string)
Definition: lyricsScraper.py:98
minilyrics.lyricsScraper.LyricsFetcher
Definition: lyricsScraper.py:92
minilyrics.lyricsScraper.MiniLyrics.vl_enc
def vl_enc(data, md5_extra)
Definition: lyricsScraper.py:38
minilyrics.lyricsScraper.LyricsFetcher.DEBUG
DEBUG
Definition: lyricsScraper.py:94
xbmc.log
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9