MythTV master
lyricsScraper.py
Go to the documentation of this file.
1#-*- coding: UTF-8 -*-
2'''
3Scraper for https://www.musixmatch.com/
4
5musixmatchlrc
6
7https://github.com/rtcq/syncedlyrics
8'''
9
10import requests
11import json
12import time
13import difflib
14import xbmcvfs
15from lib.utils import *
16
17__title__ = "musixmatchlrc"
18__priority__ = '100'
19__lrc__ = True
20
21
23 def __init__(self, *args, **kwargs):
24 self.DEBUG = kwargs['debug']
25 self.settings = kwargs['settings']
26 self.SEARCH_URL = 'https://apic-desktop.musixmatch.com/ws/1.1/%s'
27 self.session = requests.Session()
28 self.session.headers.update(
29 {
30 "authority": "apic-desktop.musixmatch.com",
31 "cookie": "AWSELBCORS=0; AWSELB=0",
32 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0",
33 }
34 )
35 self.current_time = int(time.time())
36
37 def get_token(self):
38 self.token = ''
39 tokenpath = os.path.join(PROFILE, 'musixmatch_token')
40 if xbmcvfs.exists(tokenpath):
41 tokenfile = xbmcvfs.File(tokenpath)
42 tokendata = json.load(tokenfile)
43 tokenfile.close()
44 cached_token = tokendata.get("token")
45 expiration_time = tokendata.get("expiration_time")
46 if cached_token and expiration_time and self.current_time < expiration_time:
47 self.token = cached_token
48 if not self.token:
49 try:
50 url = self.SEARCH_URL % 'token.get'
51 query = [('user_language', 'en'), ('app_id', 'web-desktop-app-v1.0'), ('t', self.current_time)]
52 response = self.session.get(url, params=query, timeout=10)
53 result = response.json()
54 except:
55 return None
56 if 'message' in result and 'body' in result["message"] and 'user_token' in result["message"]["body"]:
57 self.token = result["message"]["body"]["user_token"]
58 expiration_time = self.current_time + 600
59 tokendata = {}
60 tokendata['token'] = self.token
61 tokendata['expiration_time'] = expiration_time
62 tokenfile = xbmcvfs.File(tokenpath, 'w')
63 json.dump(tokendata, tokenfile)
64 tokenfile.close()
65 return self.token
66
67 def get_lyrics(self, song):
68 self.token = self.get_token()
69 if not self.token:
70 return
71 log("%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title), debug=self.DEBUG)
72 lyrics = Lyrics(settings=self.settings)
73 lyrics.song = song
74 lyrics.source = __title__
75 lyrics.lrc = __lrc__
76 artist = song.artist.replace(' ', '+')
77 title = song.title.replace(' ', '+')
78 search = '%s - %s' % (artist, title)
79 try:
80 url = self.SEARCH_URL % 'track.search'
81 query = [('q', search), ('page_size', '5'), ('page', '1'), ('app_id', 'web-desktop-app-v1.0'), ('usertoken', self.token), ('t', self.current_time)]
82 response = requests.get(url, params=query, timeout=10)
83 result = response.json()
84 except:
85 return None
86 links = []
87 if 'message' in result and 'body' in result["message"] and 'track_list' in result["message"]["body"] and result["message"]["body"]["track_list"]:
88 for item in result["message"]["body"]["track_list"]:
89 artistname = item['track']['artist_name']
90 songtitle = item['track']['track_name']
91 trackid = item['track']['track_id']
92 if (difflib.SequenceMatcher(None, artist.lower(), artistname.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, title.lower(), songtitle.lower()).ratio() > 0.8):
93 links.append((artistname + ' - ' + songtitle, trackid, artistname, songtitle))
94 if len(links) == 0:
95 return None
96 elif len(links) > 1:
97 lyrics.list = links
98 for link in links:
99 lyr = self.get_lyrics_from_list(link)
100 if lyr:
101 lyrics.lyrics = lyr
102 return lyrics
103 return None
104
105 def get_lyrics_from_list(self, link):
106 title,trackid,artist,song = link
107 try:
108 log('%s: search track id: %s' % (__title__, trackid), debug=self.DEBUG)
109 url = self.SEARCH_URL % 'track.subtitle.get'
110 query = [('track_id', trackid), ('subtitle_format', 'lrc'), ('app_id', 'web-desktop-app-v1.0'), ('usertoken', self.token), ('t', self.current_time)]
111 response = requests.get(url, params=query, timeout=10)
112 result = response.json()
113 except:
114 return None
115 if 'message' in result and 'body' in result["message"] and 'subtitle' in result["message"]["body"] and 'subtitle_body' in result["message"]["body"]["subtitle"]:
116 lyrics = result["message"]["body"]["subtitle"]["subtitle_body"]
117 return lyrics
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9
bool exists(str path)
Definition: xbmcvfs.py:51