MythTV  master
lyricsScraper.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 '''
3 Scraper for https://www.musixmatch.com/
4 
5 musixmatchlrc
6 
7 https://github.com/rtcq/syncedlyrics
8 '''
9 
10 import requests
11 import json
12 import time
13 import difflib
14 import xbmcvfs
15 from lib.utils import *
16 
17 __title__ = "musixmatchlrc"
18 __priority__ = '100'
19 __lrc__ = True
20 
21 
23  def __init__(self, *args, **kwargs):
24  self.DEBUG = kwargs['debug']
25  self.settings = kwargs['settings']
26  self.SEARCH_URL = 'https://apic-desktop.musixmatch.com/ws/1.1/%s'
27  self.session = requests.Session()
28  self.session.headers.update(
29  {
30  "authority": "apic-desktop.musixmatch.com",
31  "cookie": "AWSELBCORS=0; AWSELB=0",
32  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0",
33  }
34  )
35  self.current_time = int(time.time())
36 
37  def get_token(self):
38  self.token = ''
39  tokenpath = os.path.join(PROFILE, 'musixmatch_token')
40  if xbmcvfs.exists(tokenpath):
41  tokenfile = xbmcvfs.File(tokenpath)
42  tokendata = json.load(tokenfile)
43  tokenfile.close()
44  cached_token = tokendata.get("token")
45  expiration_time = tokendata.get("expiration_time")
46  if cached_token and expiration_time and self.current_time < expiration_time:
47  self.token = cached_token
48  if not self.token:
49  try:
50  url = self.SEARCH_URL % 'token.get'
51  query = [('user_language', 'en'), ('app_id', 'web-desktop-app-v1.0'), ('t', self.current_time)]
52  response = self.session.get(url, params=query, timeout=10)
53  result = response.json()
54  except:
55  return None
56  if 'message' in result and 'body' in result["message"] and 'user_token' in result["message"]["body"]:
57  self.token = result["message"]["body"]["user_token"]
58  expiration_time = self.current_time + 600
59  tokendata = {}
60  tokendata['token'] = self.token
61  tokendata['expiration_time'] = expiration_time
62  tokenfile = xbmcvfs.File(tokenpath, 'w')
63  json.dump(tokendata, tokenfile)
64  tokenfile.close()
65  return self.token
66 
67  def get_lyrics(self, song):
68  self.token = self.get_token()
69  if not self.token:
70  return
71  log("%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title), debug=self.DEBUG)
72  lyrics = Lyrics(settings=self.settings)
73  lyrics.song = song
74  lyrics.source = __title__
75  lyrics.lrc = __lrc__
76  artist = song.artist.replace(' ', '+')
77  title = song.title.replace(' ', '+')
78  search = '%s - %s' % (artist, title)
79  try:
80  url = self.SEARCH_URL % 'track.search'
81  query = [('q', search), ('page_size', '5'), ('page', '1'), ('app_id', 'web-desktop-app-v1.0'), ('usertoken', self.token), ('t', self.current_time)]
82  response = requests.get(url, params=query, timeout=10)
83  result = response.json()
84  except:
85  return None
86  links = []
87  if 'message' in result and 'body' in result["message"] and 'track_list' in result["message"]["body"] and result["message"]["body"]["track_list"]:
88  for item in result["message"]["body"]["track_list"]:
89  artistname = item['track']['artist_name']
90  songtitle = item['track']['track_name']
91  trackid = item['track']['track_id']
92  if (difflib.SequenceMatcher(None, artist.lower(), artistname.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, title.lower(), songtitle.lower()).ratio() > 0.8):
93  links.append((artistname + ' - ' + songtitle, trackid, artistname, songtitle))
94  if len(links) == 0:
95  return None
96  elif len(links) > 1:
97  lyrics.list = links
98  for link in links:
99  lyr = self.get_lyrics_from_list(link)
100  if lyr:
101  lyrics.lyrics = lyr
102  return lyrics
103  return None
104 
105  def get_lyrics_from_list(self, link):
106  title,trackid,artist,song = link
107  try:
108  log('%s: search track id: %s' % (__title__, trackid), debug=self.DEBUG)
109  url = self.SEARCH_URL % 'track.subtitle.get'
110  query = [('track_id', trackid), ('subtitle_format', 'lrc'), ('app_id', 'web-desktop-app-v1.0'), ('usertoken', self.token), ('t', self.current_time)]
111  response = requests.get(url, params=query, timeout=10)
112  result = response.json()
113  except:
114  return None
115  if 'message' in result and 'body' in result["message"] and 'subtitle' in result["message"]["body"] and 'subtitle_body' in result["message"]["body"]["subtitle"]:
116  lyrics = result["message"]["body"]["subtitle"]["subtitle_body"]
117  return lyrics
utils
culrcscrapers.musixmatchlrc.lyricsScraper.LyricsFetcher.get_lyrics_from_list
def get_lyrics_from_list(self, link)
Definition: lyricsScraper.py:105
xbmcvfs.exists
bool exists(str path)
Definition: xbmcvfs.py:51
culrcscrapers.musixmatchlrc.lyricsScraper.LyricsFetcher.__init__
def __init__(self, *args, **kwargs)
Definition: lyricsScraper.py:23
culrcscrapers.musixmatchlrc.lyricsScraper.LyricsFetcher
Definition: lyricsScraper.py:22
culrcscrapers.musixmatchlrc.lyricsScraper.LyricsFetcher.token
token
Definition: lyricsScraper.py:38
culrcscrapers.musixmatchlrc.lyricsScraper.LyricsFetcher.current_time
current_time
Definition: lyricsScraper.py:35
culrcscrapers.musixmatchlrc.lyricsScraper.LyricsFetcher.SEARCH_URL
SEARCH_URL
Definition: lyricsScraper.py:26
culrcscrapers.musixmatchlrc.lyricsScraper.LyricsFetcher.get_token
def get_token(self)
Definition: lyricsScraper.py:37
culrcscrapers.musixmatchlrc.lyricsScraper.LyricsFetcher.settings
settings
Definition: lyricsScraper.py:25
hardwareprofile.distros.all.get
def get()
Definition: all.py:22
culrcscrapers.musixmatchlrc.lyricsScraper.LyricsFetcher.session
session
Definition: lyricsScraper.py:27
culrcscrapers.musixmatchlrc.lyricsScraper.LyricsFetcher.get_lyrics
def get_lyrics(self, song)
Definition: lyricsScraper.py:67
xbmcvfs.File
Definition: xbmcvfs.py:10
culrcscrapers.musixmatchlrc.lyricsScraper.LyricsFetcher.DEBUG
DEBUG
Definition: lyricsScraper.py:24
xbmc.log
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9