3 Scraper for http://lrcct2.ttplayer.com/
13 from urllib
import urlopen
15 from urllib.request
import urlopen
21 from optparse
import OptionParser
22 from common
import utilities
24 __author__ =
"Paul Harrison and 'taxigps'"
25 __title__ =
"TTPlayer"
26 __description__ =
"Search http://lrcct2.ttplayer.com for lyrics"
29 __syncronized__ =
True
33 socket.setdefaulttimeout(10)
35 LYRIC_TITLE_STRIP=[
"\(live[^\)]*\)",
"\(acoustic[^\)]*\)",
36 "\([^\)]*mix\)",
"\([^\)]*version\)",
37 "\([^\)]*edit\)",
"\(feat[^\)]*\)"]
38 LYRIC_TITLE_REPLACE=[(
"/",
"-"),(
" & ",
" and ")]
39 LYRIC_ARTIST_REPLACE=[(
"/",
"-"),(
" & ",
" and ")]
43 privide ttplayer specific function, such as encoding artist and title,
44 generate a Id code for server authorizition.
45 (see http://ttplyrics.googlecode.com/svn/trunk/crack)
51 These code may be ugly coz it is translated
52 from C code which is translated from asm code
53 grabed by ollydbg from ttp_lrcs.dll.
54 (see http://ttplyrics.googlecode.com/svn/trunk/crack)
61 tmp1 = (Id & 0x0000FF00) >> 8
64 if ( (Id & 0x00FF0000) == 0 ):
65 tmp3 = 0x000000FF & ~tmp1
67 tmp3 = 0x000000FF & ((Id & 0x00FF0000) >> 16)
69 tmp3 = tmp3 | ((0x000000FF & Id) << 8)
71 tmp3 = tmp3 | (0x000000FF & tmp1)
73 if ( (Id & 0xFF000000) == 0 ) :
74 tmp3 = tmp3 | (0x000000FF & (~Id))
76 tmp3 = tmp3 | (0x000000FF & (Id >> 24))
85 tmp1 = (char + tmp2) & 0x00000000FFFFFFFF
86 tmp2 = (tmp2 << (i%2 + 4)) & 0x00000000FFFFFFFF
87 tmp2 = (tmp1 + tmp2) & 0x00000000FFFFFFFF
98 tmp7 = (char + tmp1) & 0x00000000FFFFFFFF
99 tmp1 = (tmp1 << (i%2 + 3)) & 0x00000000FFFFFFFF
100 tmp1 = (tmp1 + tmp7) & 0x00000000FFFFFFFF
108 tmp1 = (((((tmp2 ^ tmp3) & 0x00000000FFFFFFFF) + (tmp1 | Id)) & 0x00000000FFFFFFFF) * (tmp1 | tmp3)) & 0x00000000FFFFFFFF
109 tmp1 = (tmp1 * (tmp2 ^ Id)) & 0x00000000FFFFFFFF
111 if tmp1 > 0x80000000:
112 tmp1 = tmp1 - 0x100000000
120 mystr = uni.encode(
'UTF-16')[2:]
121 for i
in range(len(mystr)):
122 rtn +=
'%02x' % ord(mystr[i])
125 mystr = uni.encode(
'UTF-16')[2:]
126 for i
in range(len(mystr)):
127 rtn +=
'%02x' % (mystr[i])
133 self.
LIST_URL =
'http://ttlrccnc.qianqian.com/dll/lyricsvr.dll?sh?Artist=%s&Title=%s&Flags=0'
134 self.
LYRIC_URL =
'http://ttlrccnc.qianqian.com/dll/lyricsvr.dll?dl?Id=%d&Code=%d&uid=01&mac=%012x'
137 utilities.log(debug,
"%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))
140 for exp
in LYRIC_ARTIST_REPLACE:
141 p = re.compile(exp[0])
142 artist = p.sub(exp[1], lyrics.artist)
143 for exp
in LYRIC_TITLE_REPLACE:
144 p = re.compile(exp[0])
145 title = p.sub(exp[1], lyrics.title)
148 for exp
in LYRIC_TITLE_STRIP:
150 title = p.sub(
'', title)
153 title = title.strip().replace(
'`',
'').replace(
'/',
'')
154 artist = artist.strip().replace(
'`',
'').replace(
'/',
'')
157 url = self.
LIST_URL %(ttpClient.EncodeArtTit(artist.replace(
' ',
'').lower()), ttpClient.EncodeArtTit(title.replace(
' ',
'').lower()))
159 Page = f.read().
decode(
'utf-8')
161 utilities.log(
True,
"%s: %s::%s (%d) [%s]" % (
162 __title__, self.__class__.__name__,
163 sys.exc_info()[ 2 ].tb_frame.f_code.co_name,
164 sys.exc_info()[ 2 ].tb_lineno,
169 links_query = re.compile(
'<lrc id=\"(.*?)\" artist=\"(.*?)\" title=\"(.*?)\"></lrc>')
170 urls = re.findall(links_query, Page)
173 if (difflib.SequenceMatcher(
None, artist.lower(), x[1].lower()).ratio() > 0.8)
and (difflib.SequenceMatcher(
None, title.lower(), x[2].lower()).ratio() > 0.8):
174 links.append( ( x[1] +
' - ' + x[2], x[0], x[1], x[2] ) )
181 if lyr
and lyr.startswith(b
'['):
182 enc = chardet.detect(lyr)
183 lyr = lyr.decode(enc[
'encoding'],
'ignore')
189 title,Id,artist,song = link
190 utilities.log(debug,
'%s %s %s' %(Id, artist, song))
192 url = self.
LYRIC_URL %(int(Id),ttpClient.CodeFunc(int(Id), artist + song), random.randint(0,0xFFFFFFFFFFFF))
196 utilities.log(
True,
"%s: %s::%s (%d) [%s]" % (
197 __title__, self.__class__.__name__,
198 sys.exc_info()[ 2 ].tb_frame.f_code.co_name,
199 sys.exc_info()[ 2 ].tb_lineno,
203 if Page.startswith(b
'['):
209 lyrics = utilities.Lyrics()
210 lyrics.source = __title__
211 lyrics.syncronized = __syncronized__
212 lyrics.artist =
'Dire Straits'
213 lyrics.album =
'Brothers In Arms'
214 lyrics.title =
'Money For Nothing'
217 found = fetcher.get_lyrics(lyrics)
220 utilities.log(
True,
"Everything appears in order.")
224 utilities.log(
True,
"The lyrics for the test search failed!")
228 from lxml
import etree
229 xml = etree.XML(
u'<lyrics></lyrics>')
230 etree.SubElement(xml,
"artist").text = lyrics.artist
231 etree.SubElement(xml,
"album").text = lyrics.album
232 etree.SubElement(xml,
"title").text = lyrics.title
233 etree.SubElement(xml,
"syncronized").text =
'True' if __syncronized__
else 'False'
234 etree.SubElement(xml,
"grabber").text = lyrics.source
236 lines = lyrics.lyrics.splitlines()
238 etree.SubElement(xml,
"lyric").text = line
240 utilities.log(
True, utilities.convert_etree(etree.tostring(xml, encoding=
'UTF-8',
241 pretty_print=
True, xml_declaration=
True)))
245 from lxml
import etree
246 version = etree.XML(
u'<grabber></grabber>')
247 etree.SubElement(version,
"name").text = __title__
248 etree.SubElement(version,
"author").text = __author__
249 etree.SubElement(version,
"command").text =
'ttplayer.py'
250 etree.SubElement(version,
"type").text =
'lyrics'
251 etree.SubElement(version,
"description").text = __description__
252 etree.SubElement(version,
"version").text = __version__
253 etree.SubElement(version,
"priority").text = __priority__
254 etree.SubElement(version,
"syncronized").text =
'True' if __syncronized__
else 'False'
256 utilities.log(
True, utilities.convert_etree(etree.tostring(version, encoding=
'UTF-8',
257 pretty_print=
True, xml_declaration=
True)))
263 parser = OptionParser()
265 parser.add_option(
'-v',
"--version", action=
"store_true", default=
False,
266 dest=
"version", help=
"Display version and author")
267 parser.add_option(
'-t',
"--test", action=
"store_true", default=
False,
268 dest=
"test", help=
"Test grabber with a know good search")
269 parser.add_option(
'-s',
"--search", action=
"store_true", default=
False,
270 dest=
"search", help=
"Search for lyrics.")
271 parser.add_option(
'-a',
"--artist", metavar=
"ARTIST", default=
None,
272 dest=
"artist", help=
"Artist of track.")
273 parser.add_option(
'-b',
"--album", metavar=
"ALBUM", default=
None,
274 dest=
"album", help=
"Album of track.")
275 parser.add_option(
'-n',
"--title", metavar=
"TITLE", default=
None,
276 dest=
"title", help=
"Title of track.")
277 parser.add_option(
'-f',
"--filename", metavar=
"FILENAME", default=
None,
278 dest=
"filename", help=
"Filename of track.")
279 parser.add_option(
'-d',
'--debug', action=
"store_true", default=
False,
280 dest=
"debug", help=(
"Show debug messages"))
282 opts, args = parser.parse_args()
284 lyrics = utilities.Lyrics()
285 lyrics.source = __title__
286 lyrics.syncronized = __syncronized__
298 lyrics.artist = opts.artist
300 lyrics.album = opts.album
302 lyrics.title = opts.title
304 lyrics.filename = opts.filename
307 if fetcher.get_lyrics(lyrics):
311 utilities.log(
True,
"No lyrics found for this track")
314 if __name__ ==
'__main__':