MythTV  master
lyricsScraper.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 '''
3 Scraper for https://www.musixmatch.com/
4 
5 musixmatch
6 '''
7 
8 import os
9 import requests
10 import re
11 import random
12 import difflib
13 import html
14 from bs4 import BeautifulSoup
15 from lib.utils import *
16 
17 __title__ = "musixmatch"
18 __priority__ = '210'
19 __lrc__ = False
20 
21 headers = {}
22 headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0'
23 
24 # search is not possible as it requires javascript, only direct access to the lyrics work.
25 
27  def __init__(self, *args, **kwargs):
28  self.DEBUG = kwargs['debug']
29  self.settings = kwargs['settings']
30  self.SEARCH_URL = 'https://www.musixmatch.com/search?query='
31  self.LYRIC_URL = 'https://www.musixmatch.com/lyrics/%s/%s'
32 
33  def get_lyrics(self, song):
34  log("%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title), debug=self.DEBUG)
35  lyrics = Lyrics(settings=self.settings)
36  lyrics.song = song
37  lyrics.source = __title__
38  lyrics.lrc = __lrc__
39  artist = song.artist.replace("'", '').replace('!', '').replace('?', '').replace('"', '').replace('/', '').replace('.', '').replace('&', '').replace(',', '').replace('(', '').replace(')', '').replace(' ', '-')
40  title = song.title.replace("'", '').replace('!', '').replace('?', '').replace('"', '').replace('/', '').replace('.', '').replace('&', '').replace(',', '').replace('(', '').replace(')', '').replace(' ', '-')
41  url = self.LYRIC_URL % (artist, title)
42  try:
43  log('%s: search url: %s' % (__title__, url), debug=self.DEBUG)
44  search = requests.get(url, headers=headers, timeout=10)
45  response = search.text
46  except:
47  return None
48  matchcode = re.search('Lyrics of (.*?)Writer\(s\): ', response, flags=re.DOTALL)
49  if matchcode:
50  lyricscode = (matchcode.group(1))
51  lyr = re.sub('<[^<]+?>', '\n', lyricscode)
52  lyr = html.unescape(lyr)
53  lyrics.lyrics = lyr.replace('\n\n\n\n', '\n')
54  return lyrics
55  return None
56 
57 '''
58  def get_lyrics(self, song):
59  log("%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title), debug=self.DEBUG)
60  lyrics = Lyrics(settings=self.settings)
61  lyrics.song = song
62  lyrics.source = __title__
63  lyrics.lrc = __lrc__
64  artist = song.artist.replace(' ', '+')
65  title = song.title.replace(' ', '+')
66  search = '%s+%s' % (artist, title)
67  try:
68  url = self.SEARCH_URL + search
69  response = requests.get(url, headers=headers, timeout=10)
70  result = response.text
71  except:
72  return None
73  links = []
74  soup = BeautifulSoup(result, 'html.parser')
75  for item in soup.find_all('li', {'class': 'showArtist'}):
76  artistname = item.find('a', {'class': 'artist'}).get_text()
77  songtitle = item.find('a', {'class': 'title'}).get_text()
78  url = item.find('a', {'class': 'title'}).get('href')
79  if (difflib.SequenceMatcher(None, artist.lower(), artistname.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, title.lower(), songtitle.lower()).ratio() > 0.8):
80  links.append((artistname + ' - ' + songtitle, self.LYRIC_URL + url, artistname, songtitle))
81  if len(links) == 0:
82  return None
83  elif len(links) > 1:
84  lyrics.list = links
85  for link in links:
86  lyr = self.get_lyrics_from_list(link)
87  if lyr:
88  lyrics.lyrics = lyr
89  return lyrics
90  return None
91 
92  def get_lyrics_from_list(self, link):
93  title,url,artist,song = link
94  try:
95  log('%s: search url: %s' % (__title__, url), debug=self.DEBUG)
96  response = requests.get(url, headers=headers, timeout=10)
97  result = response.text
98  except:
99  return None
100  soup = BeautifulSoup(result, 'html.parser')
101  lyr = soup.find_all('span', {'class': 'lyrics__content__ok'})
102  if lyr:
103  lyrics = ''
104  for part in lyr:
105  lyrics = lyrics + part.get_text() + '\n'
106  return lyrics
107  else:
108  lyr = soup.find_all('span', {'class': 'lyrics__content__error'})
109  if lyr:
110  lyrics = ''
111  for part in lyr:
112  lyrics = lyrics + part.get_text() + '\n'
113  return lyrics
114 '''
utils
culrcscrapers.musixmatch.lyricsScraper.LyricsFetcher.__init__
def __init__(self, *args, **kwargs)
Definition: lyricsScraper.py:27
culrcscrapers.musixmatch.lyricsScraper.LyricsFetcher.get_lyrics
def get_lyrics(self, song)
Definition: lyricsScraper.py:33
culrcscrapers.musixmatch.lyricsScraper.LyricsFetcher.settings
settings
Definition: lyricsScraper.py:29
culrcscrapers.musixmatch.lyricsScraper.LyricsFetcher.LYRIC_URL
LYRIC_URL
Definition: lyricsScraper.py:31
culrcscrapers.musixmatch.lyricsScraper.LyricsFetcher
Definition: lyricsScraper.py:26
culrcscrapers.musixmatch.lyricsScraper.LyricsFetcher.DEBUG
DEBUG
Definition: lyricsScraper.py:28
culrcscrapers.musixmatch.lyricsScraper.LyricsFetcher.SEARCH_URL
SEARCH_URL
Definition: lyricsScraper.py:30
xbmc.log
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9