MythTV  master
lyricsScraper.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 import sys
3 import re
4 import urllib.parse
5 import requests
6 import html
7 import xbmc
8 import xbmcaddon
9 import json
10 import difflib
11 from lib.utils import *
12 
13 __title__ = 'genius'
14 __priority__ = '200'
15 __lrc__ = False
16 
17 
19  def __init__(self, *args, **kwargs):
20  self.DEBUG = kwargs['debug']
21  self.settings = kwargs['settings']
22  self.url = 'http://api.genius.com/search?q=%s%%20%s&access_token=Rq_cyNZ6fUOQr4vhyES6vu1iw3e94RX85ju7S8-0jhM-gftzEvQPG7LJrrnTji11'
23 
24  def get_lyrics(self, song):
25  log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title), debug=self.DEBUG)
26  lyrics = Lyrics(settings=self.settings)
27  lyrics.song = song
28  lyrics.source = __title__
29  lyrics.lrc = __lrc__
30  try:
31  headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; rv:77.0) Gecko/20100101 Firefox/77.0'}
32  url = self.url % (urllib.parse.quote(song.artist), urllib.parse.quote(song.title))
33  req = requests.get(url, headers=headers, timeout=10)
34  response = req.text
35  except:
36  return None
37  data = json.loads(response)
38  try:
39  name = data['response']['hits'][0]['result']['primary_artist']['name']
40  track = data['response']['hits'][0]['result']['title']
41  if (difflib.SequenceMatcher(None, song.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, song.title.lower(), track.lower()).ratio() > 0.8):
42  self.page = data['response']['hits'][0]['result']['url']
43  else:
44  return None
45  except:
46  return None
47  log('%s: search url: %s' % (__title__, self.page), debug=self.DEBUG)
48  try:
49  headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; rv:77.0) Gecko/20100101 Firefox/77.0'}
50  req = requests.get(self.page, headers=headers, timeout=10)
51  response = req.text
52  except:
53  return None
54  response = html.unescape(response)
55  matchcode = re.findall('class="Lyrics__Container.*?">(.*?)</div><div', response, flags=re.DOTALL)
56  try:
57  lyricscode = ""
58  for matchCodeItem in matchcode:
59  lyricscode = lyricscode + matchCodeItem
60  lyr1 = re.sub('<br/>', '\n', lyricscode)
61  lyr2 = re.sub('<[^<]+?>', '', lyr1)
62  lyr3 = lyr2.replace('\\n','\n').strip()
63  if not lyr3 or lyr3 == '[Instrumental]' or lyr3.startswith('Lyrics for this song have yet to be released'):
64  return None
65  lyrics.lyrics = lyr3
66  return lyrics
67  except:
68  return None
culrcscrapers.genius.lyricsScraper.LyricsFetcher.get_lyrics
def get_lyrics(self, song)
Definition: lyricsScraper.py:24
utils
culrcscrapers.genius.lyricsScraper.LyricsFetcher
Definition: lyricsScraper.py:18
culrcscrapers.genius.lyricsScraper.LyricsFetcher.page
page
Definition: lyricsScraper.py:42
culrcscrapers.genius.lyricsScraper.LyricsFetcher.url
url
Definition: lyricsScraper.py:22
culrcscrapers.genius.lyricsScraper.LyricsFetcher.__init__
def __init__(self, *args, **kwargs)
Definition: lyricsScraper.py:19
culrcscrapers.genius.lyricsScraper.LyricsFetcher.DEBUG
DEBUG
Definition: lyricsScraper.py:20
culrcscrapers.genius.lyricsScraper.LyricsFetcher.settings
settings
Definition: lyricsScraper.py:21
xbmc.log
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9