MythTV master
lyricsScraper.py
Go to the documentation of this file.
1#-*- coding: UTF-8 -*-
2import re
3import requests
4import urllib.parse
5import difflib
6from bs4 import BeautifulSoup
7from lib.utils import *
8
9__title__ = 'lyricscom'
10__priority__ = '240'
11__lrc__ = False
12
13UserAgent = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"}
14
16 def __init__(self, *args, **kwargs):
17 self.DEBUG = kwargs['debug']
18 self.settings = kwargs['settings']
19 self.url = 'https://www.lyrics.com/serp.php?st=%s&qtype=2'
20
21 def get_lyrics(self, song):
22 sess = requests.Session()
23 log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title), debug=self.DEBUG)
24 lyrics = Lyrics(settings=self.settings)
25 lyrics.song = song
26 lyrics.source = __title__
27 lyrics.lrc = __lrc__
28 try:
29 request = sess.get(self.url % urllib.parse.quote_plus(song.artist), headers=UserAgent, timeout=10)
30 response = request.text
31 except:
32 return
33 soup = BeautifulSoup(response, 'html.parser')
34 url = ''
35 for link in soup.find_all('a'):
36 if link.string and link.get('href').startswith('artist/'):
37 url = 'https://www.lyrics.com/' + link.get('href')
38 break
39 if url:
40 try:
41 req = sess.get(url, headers=UserAgent, timeout=10)
42 resp = req.text
43 except:
44 return
45 soup = BeautifulSoup(resp, 'html.parser')
46 url = ''
47 for link in soup.find_all('a'):
48 if link.string and (difflib.SequenceMatcher(None, link.string.lower(), song.title.lower()).ratio() > 0.8):
49 url = 'https://www.lyrics.com' + link.get('href')
50 break
51 if url:
52 try:
53 req2 = sess.get(url, headers=UserAgent, timeout=10)
54 resp2 = req2.text
55 except:
56 return
57 matchcode = re.search('<pre.*?>(.*?)</pre>', resp2, flags=re.DOTALL)
58 if matchcode:
59 lyricscode = (matchcode.group(1))
60 lyr = re.sub('<[^<]+?>', '', lyricscode)
61 lyrics.lyrics = lyr.replace('\\n','\n')
62 return lyrics
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9