MythTV master
lyricsScraper.py
Go to the documentation of this file.
1#-*- coding: UTF-8 -*-
2'''
3Scraper for http://newlyrics.gomtv.com/
4
5edge
6'''
7
8import sys
9import hashlib
10import requests
11import urllib.parse
12import re
13import unicodedata
14from lib.utils import *
15from lib.audiofile import AudioFile
16
17__title__ = 'GomAudio'
18__priority__ = '110'
19__lrc__ = True
20
21
22GOM_URL = 'http://newlyrics.gomtv.com/cgi-bin/lyrics.cgi?cmd=find_get_lyrics&file_key=%s&title=%s&artist=%s&from=gomaudio_local'
23
25 nfkd_data = unicodedata.normalize('NFKD', data)
26 return u"".join([c for c in nfkd_data if not unicodedata.combining(c)])
27
28
29class gomClient(object):
30 '''
31 privide Gom specific function, such as key from mp3
32 '''
33 @staticmethod
34 def GetKeyFromFile(file):
35 musf = AudioFile()
36 musf.Open(file)
37 buf = musf.ReadAudioStream(100*1024) # 100KB from audio data
38 musf.Close()
39 # buffer will be empty for streaming audio
40 if not buf:
41 return
42 # calculate hashkey
43 m = hashlib.md5()
44 m.update(buf)
45 return m.hexdigest()
46
47 @staticmethod
48 def mSecConv(msec):
49 s,ms = divmod(msec/10,100)
50 m,s = divmod(s,60)
51 return m,s,ms
52
54 def __init__(self, *args, **kwargs):
55 self.DEBUG = kwargs['debug']
56 self.settings = kwargs['settings']
57 self.base_url = 'http://newlyrics.gomtv.com/'
58
59 def get_lyrics(self, song, key=None, ext=None):
60 log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title), debug=self.DEBUG)
61 lyrics = Lyrics(settings=self.settings)
62 lyrics.song = song
63 lyrics.source = __title__
64 lyrics.lrc = __lrc__
65 try:
66 if not ext:
67 ext = os.path.splitext(song.filepath)[1].lower()
68 sup_ext = ['.mp3', '.ogg', '.wma', '.flac', '.ape', '.wav']
69 if ext in sup_ext and key == None:
70 key = gomClient.GetKeyFromFile(song.filepath)
71 if not key:
72 return None
73 url = GOM_URL %(key, urllib.parse.quote(remove_accents(song.title).encode('euc-kr')), urllib.parse.quote(remove_accents(song.artist).encode('euc-kr')))
74 response = requests.get(url, timeout=10)
75 response.encoding = 'euc-kr'
76 Page = response.text
77 except:
78 log('%s: %s::%s (%d) [%s]' % (
79 __title__, self.__class__.__name__,
80 sys.exc_info()[2].tb_frame.f_code.co_name,
81 sys.exc_info()[2].tb_lineno,
82 sys.exc_info()[1]
83 ), debug=self.DEBUG)
84 return None
85 if Page[:Page.find('>')+1] != '<lyrics_reply result="0">':
86 return None
87 syncs = re.compile('<sync start="(\d+)">([^<]*)</sync>').findall(Page)
88 lyrline = []
89 lyrline.append('[ti:%s]' %song.title)
90 lyrline.append('[ar:%s]' %song.artist)
91 for sync in syncs:
92 # timeformat conversion
93 t = '%02d:%02d.%02d' % gomClient.mSecConv(int(sync[0]))
94 # unescape string
95 try:
96 s = sync[1].replace('&apos;',"'").replace('&quot;','"')
97 lyrline.append('[%s]%s' %(t,s))
98 except:
99 pass
100 lyrics.lyrics = '\n'.join(lyrline)
101 return lyrics
def __init__(self, *args, **kwargs)
def get_lyrics(self, song, key=None, ext=None)
None log(str msg, int level=LOGDEBUG)
Definition: xbmc.py:9