MythTV  master
lyricsScraper.py
Go to the documentation of this file.
1 #-*- coding: UTF-8 -*-
2 """
3 Scraper for https://xiami.com
4 
5 Taxigps
6 """
7 
8 import urllib.parse
9 import socket
10 import re
11 import difflib
12 import json
13 import chardet
14 import requests
15 from utilities import *
16 
17 __title__ = "Xiami"
18 __priority__ = '110'
19 __lrc__ = True
20 
21 UserAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0'
22 
23 socket.setdefaulttimeout(10)
24 
26  def __init__( self ):
27  self.LIST_URL = 'https://www.xiami.com/search?key=%s'
28  self.SONG_URL = 'https://www.xiami.com/song/playlist/id/%s/object_name/default/object_id/0'
29  self.session = requests.Session()
30 
31  def get_lyrics(self, song):
32  log( "%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title))
33  lyrics = Lyrics()
34  lyrics.song = song
35  lyrics.source = __title__
36  lyrics.lrc = __lrc__
37  keyword = "%s %s" % (song.title, song.artist)
38  url = self.LIST_URL % (urllib.parse.quote(keyword))
39  try:
40  response = self.session.get(url, headers={'User-Agent': UserAgent, 'Referer': 'https://www.xiami.com/play'})
41  result = response.text
42  except:
43  log( "%s: %s::%s (%d) [%s]" % (
44  __title__, self.__class__.__name__,
45  sys.exc_info()[ 2 ].tb_frame.f_code.co_name,
46  sys.exc_info()[ 2 ].tb_lineno,
47  sys.exc_info()[ 1 ]
48  ))
49  return None
50  match = re.compile('<td class="chkbox">.+?value="(.+?)".+?href="//www.xiami.com/song/[^"]+" title="([^"]+)".*?href="//www.xiami.com/artist/[^"]+" title="([^"]+)"', re.DOTALL).findall(result)
51  links = []
52  for x in match:
53  title = x[1]
54  artist = x[2]
55  if (difflib.SequenceMatcher(None, song.artist.lower(), artist.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, song.title.lower(), title.lower()).ratio() > 0.8):
56  links.append( ( artist + ' - ' + title, x[0], artist, title ) )
57  if len(links) == 0:
58  return None
59  elif len(links) > 1:
60  lyrics.list = links
61  lyr = self.get_lyrics_from_list(links[0])
62  if not lyr:
63  return None
64  lyrics.lyrics = lyr
65  return lyrics
66 
67  def get_lyrics_from_list(self, link):
68  title,id,artist,song = link
69  try:
70  response = self.session.get(self.SONG_URL % (id), headers={'User-Agent': UserAgent, 'Referer': 'https://www.xiami.com/play'})
71  result = response.text
72  data = json.loads(result)
73  if 'data' in data and 'trackList' in data['data'] and data['data']['trackList'] and 'lyric' in data['data']['trackList'][0] and data['data']['trackList'][0]['lyric']:
74  url = data['data']['trackList'][0]['lyric']
75  except:
76  log( "%s: %s::%s (%d) [%s]" % (
77  __title__, self.__class__.__name__,
78  sys.exc_info()[ 2 ].tb_frame.f_code.co_name,
79  sys.exc_info()[ 2 ].tb_lineno,
80  sys.exc_info()[ 1 ]
81  ))
82  return
83  try:
84  response = self.session.get(url, headers={'User-Agent': UserAgent, 'Referer': 'https://www.xiami.com/play'})
85  lyrics = response.content
86  except:
87  log( "%s: %s::%s (%d) [%s]" % (
88  __title__, self.__class__.__name__,
89  sys.exc_info()[ 2 ].tb_frame.f_code.co_name,
90  sys.exc_info()[ 2 ].tb_lineno,
91  sys.exc_info()[ 1 ]
92  ))
93  return
94  enc = chardet.detect(lyrics)
95  lyrics = lyrics.decode(enc['encoding'], 'ignore')
96  return lyrics
xiami.lyricsScraper.LyricsFetcher.__init__
def __init__(self)
Definition: lyricsScraper.py:26
xiami.lyricsScraper.LyricsFetcher.session
session
Definition: lyricsScraper.py:29
xiami.lyricsScraper.LyricsFetcher.get_lyrics_from_list
def get_lyrics_from_list(self, link)
Definition: lyricsScraper.py:67
xiami.lyricsScraper.LyricsFetcher.get_lyrics
def get_lyrics(self, song)
Definition: lyricsScraper.py:31
xiami.lyricsScraper.LyricsFetcher.LIST_URL
LIST_URL
Definition: lyricsScraper.py:27
xiami.lyricsScraper.LyricsFetcher.SONG_URL
SONG_URL
Definition: lyricsScraper.py:28
hardwareprofile.distros.all.get
def get()
Definition: all.py:22
xiami.lyricsScraper.LyricsFetcher
Definition: lyricsScraper.py:25
xbmc.log
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9