MythTV  master
lyricsScraper.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 import re
3 import requests
4 import urllib.parse
5 import difflib
6 from bs4 import BeautifulSoup
7 from lib.utils import *
8 
9 __title__ = 'lyricscom'
10 __priority__ = '240'
11 __lrc__ = False
12 
13 UserAgent = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"}
14 
16  def __init__(self, *args, **kwargs):
17  self.DEBUG = kwargs['debug']
18  self.settings = kwargs['settings']
19  self.url = 'https://www.lyrics.com/serp.php?st=%s&qtype=2'
20 
21  def get_lyrics(self, song):
22  sess = requests.Session()
23  log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title), debug=self.DEBUG)
24  lyrics = Lyrics(settings=self.settings)
25  lyrics.song = song
26  lyrics.source = __title__
27  lyrics.lrc = __lrc__
28  try:
29  request = sess.get(self.url % urllib.parse.quote_plus(song.artist), headers=UserAgent, timeout=10)
30  response = request.text
31  except:
32  return
33  soup = BeautifulSoup(response, 'html.parser')
34  url = ''
35  for link in soup.find_all('a'):
36  if link.string and link.get('href').startswith('artist/'):
37  url = 'https://www.lyrics.com/' + link.get('href')
38  break
39  if url:
40  try:
41  req = sess.get(url, headers=UserAgent, timeout=10)
42  resp = req.text
43  except:
44  return
45  soup = BeautifulSoup(resp, 'html.parser')
46  url = ''
47  for link in soup.find_all('a'):
48  if link.string and (difflib.SequenceMatcher(None, link.string.lower(), song.title.lower()).ratio() > 0.8):
49  url = 'https://www.lyrics.com' + link.get('href')
50  break
51  if url:
52  try:
53  req2 = sess.get(url, headers=UserAgent, timeout=10)
54  resp2 = req2.text
55  except:
56  return
57  matchcode = re.search('<pre.*?>(.*?)</pre>', resp2, flags=re.DOTALL)
58  if matchcode:
59  lyricscode = (matchcode.group(1))
60  lyr = re.sub('<[^<]+?>', '', lyricscode)
61  lyrics.lyrics = lyr.replace('\\n','\n')
62  return lyrics
utils
culrcscrapers.lyricscom.lyricsScraper.LyricsFetcher.get_lyrics
def get_lyrics(self, song)
Definition: lyricsScraper.py:21
culrcscrapers.lyricscom.lyricsScraper.LyricsFetcher.url
url
Definition: lyricsScraper.py:19
culrcscrapers.lyricscom.lyricsScraper.LyricsFetcher
Definition: lyricsScraper.py:15
culrcscrapers.lyricscom.lyricsScraper.LyricsFetcher.settings
settings
Definition: lyricsScraper.py:18
xbmc.log
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9
culrcscrapers.lyricscom.lyricsScraper.LyricsFetcher.DEBUG
DEBUG
Definition: lyricsScraper.py:17
culrcscrapers.lyricscom.lyricsScraper.LyricsFetcher.__init__
def __init__(self, *args, **kwargs)
Definition: lyricsScraper.py:16