1 | | #-*- coding: UTF-8 -*- |
2 | | """ |
3 | | Scraper for https://www.letssingit.com/ |
4 | | |
5 | | ronnie |
6 | | """ |
7 | | |
8 | | import sys |
9 | | import re |
10 | | import urllib |
11 | | import urllib2 |
12 | | import socket |
13 | | import difflib |
14 | | import chardet |
15 | | from optparse import OptionParser |
16 | | from common import utilities |
17 | | |
18 | | __author__ = "Paul Harrison and 'ronie'" |
19 | | __title__ = "LetsSingIt" |
20 | | __description__ = "Search https://www.letssingit.com/ for lyrics" |
21 | | __version__ = "0.1" |
22 | | __priority__ = "120" |
23 | | __syncronized__ = False |
24 | | |
25 | | debug = False |
26 | | |
27 | | socket.setdefaulttimeout(10) |
28 | | |
29 | | class LyricsFetcher: |
30 | | |
31 | | def __init__(self): |
32 | | self.url = 'https://search.letssingit.com/?a=search&l=song&s=%s' |
33 | | |
34 | | def get_lyrics(self, lyrics): |
35 | | utilities.log(debug, '%s: searching lyrics for %s - %s' % (__title__, lyrics.artist, lyrics.title)) |
36 | | query = '%s+%s' % (urllib.quote_plus(lyrics.artist), urllib.quote_plus(lyrics.title)) |
37 | | try: |
38 | | headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0', 'Referer': 'https://www.letssingit.com/'} |
39 | | request = urllib2.Request(self.url % query, None, headers) |
40 | | req = urllib2.urlopen(request) |
41 | | response = req.read() |
42 | | utilities.log(False, response) |
43 | | except: |
44 | | return False |
45 | | req.close() |
46 | | matchcode = re.search('</TD><TD><A href="(.*?)"', response) |
47 | | if matchcode: |
48 | | lyricscode = (matchcode.group(1)) |
49 | | clean = lyricscode.lstrip('http://www.letssingit.com/').rsplit('-',1)[0] |
50 | | result = clean.replace('-lyrics-', ' ') |
51 | | if (difflib.SequenceMatcher(None, query.lower().replace('+', ''), result.lower().replace('-', '')).ratio() > 0.8): |
52 | | try: |
53 | | request = urllib2.Request(lyricscode) |
54 | | request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0') |
55 | | req = urllib2.urlopen(request) |
56 | | resp = req.read() |
57 | | except: |
58 | | return False |
59 | | req.close() |
60 | | # remove addslots |
61 | | resp = re.sub(r'<div id=adslot.*?</div>', '', resp) |
62 | | # find all class=lyrics_part_name and class=lyrics_part_text parts |
63 | | match = re.findall('<P class=lyrics_part_.*?>(.*?)</P>', resp, flags=re.DOTALL) |
64 | | if len(match): |
65 | | for line in match: |
66 | | lyrics.lyrics += line.replace('<br>', '') + '\n' |
67 | | else: |
68 | | return False |
69 | | return True |
70 | | |
71 | | def performSelfTest(): |
72 | | found = False |
73 | | lyrics = utilities.Lyrics() |
74 | | lyrics.source = __title__ |
75 | | lyrics.syncronized = __syncronized__ |
76 | | lyrics.artist = 'Dire Straits' |
77 | | lyrics.album = 'Brothers In Arms' |
78 | | lyrics.title = 'Money For Nothing' |
79 | | |
80 | | fetcher = LyricsFetcher() |
81 | | found = fetcher.get_lyrics(lyrics) |
82 | | |
83 | | if found: |
84 | | utilities.log(True, "Everything appears in order.") |
85 | | sys.exit(0) |
86 | | |
87 | | utilities.log(True, "The lyrics for the test search failed!") |
88 | | sys.exit(1) |
89 | | |
90 | | def buildLyrics(lyrics): |
91 | | from lxml import etree |
92 | | xml = etree.XML(u'<lyrics></lyrics>') |
93 | | etree.SubElement(xml, "artist").text = lyrics.artist |
94 | | etree.SubElement(xml, "album").text = lyrics.album |
95 | | etree.SubElement(xml, "title").text = lyrics.title |
96 | | etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False' |
97 | | etree.SubElement(xml, "grabber").text = lyrics.source |
98 | | |
99 | | lines = lyrics.lyrics.splitlines() |
100 | | for line in lines: |
101 | | etree.SubElement(xml, "lyric").text = line |
102 | | |
103 | | utilities.log(True, etree.tostring(xml, encoding='UTF-8', pretty_print=True, |
104 | | xml_declaration=True)) |
105 | | sys.exit(0) |
106 | | |
107 | | def buildVersion(): |
108 | | from lxml import etree |
109 | | version = etree.XML(u'<grabber></grabber>') |
110 | | etree.SubElement(version, "name").text = __title__ |
111 | | etree.SubElement(version, "author").text = __author__ |
112 | | etree.SubElement(version, "command").text = 'letssingit.py' |
113 | | etree.SubElement(version, "type").text = 'lyrics' |
114 | | etree.SubElement(version, "description").text = __description__ |
115 | | etree.SubElement(version, "version").text = __version__ |
116 | | etree.SubElement(version, "priority").text = __priority__ |
117 | | etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False' |
118 | | |
119 | | utilities.log(True, etree.tostring(version, encoding='UTF-8', pretty_print=True, |
120 | | xml_declaration=True)) |
121 | | sys.exit(0) |
122 | | |
123 | | def main(): |
124 | | global debug |
125 | | |
126 | | parser = OptionParser() |
127 | | |
128 | | parser.add_option('-v', "--version", action="store_true", default=False, |
129 | | dest="version", help="Display version and author") |
130 | | parser.add_option('-t', "--test", action="store_true", default=False, |
131 | | dest="test", help="Perform self-test for dependencies.") |
132 | | parser.add_option('-s', "--search", action="store_true", default=False, |
133 | | dest="search", help="Search for lyrics.") |
134 | | parser.add_option('-a', "--artist", metavar="ARTIST", default=None, |
135 | | dest="artist", help="Artist of track.") |
136 | | parser.add_option('-b', "--album", metavar="ALBUM", default=None, |
137 | | dest="album", help="Album of track.") |
138 | | parser.add_option('-n', "--title", metavar="TITLE", default=None, |
139 | | dest="title", help="Title of track.") |
140 | | parser.add_option('-f', "--filename", metavar="FILENAME", default=None, |
141 | | dest="filename", help="Filename of track.") |
142 | | parser.add_option('-d', '--debug', action="store_true", default=False, |
143 | | dest="debug", help=("Show debug messages")) |
144 | | |
145 | | opts, args = parser.parse_args() |
146 | | |
147 | | lyrics = utilities.Lyrics() |
148 | | lyrics.source = __title__ |
149 | | lyrics.syncronized = __syncronized__ |
150 | | |
151 | | if opts.debug: |
152 | | debug = True |
153 | | |
154 | | if opts.version: |
155 | | buildVersion() |
156 | | |
157 | | if opts.test: |
158 | | performSelfTest() |
159 | | |
160 | | if opts.artist: |
161 | | lyrics.artist = opts.artist |
162 | | if opts.album: |
163 | | lyrics.album = opts.album |
164 | | if opts.title: |
165 | | lyrics.title = opts.title |
166 | | if opts.filename: |
167 | | lyrics.filename = opts.filename |
168 | | |
169 | | if (len(args) > 0): |
170 | | utilities.log('ERROR: invalid arguments found') |
171 | | sys.exit(1) |
172 | | |
173 | | fetcher = LyricsFetcher() |
174 | | if fetcher.get_lyrics(lyrics): |
175 | | buildLyrics(lyrics) |
176 | | sys.exit(0) |
177 | | else: |
178 | | utilities.log(True, "No lyrics found for this track") |
179 | | sys.exit(1) |
180 | | |
181 | | if __name__ == '__main__': |
182 | | main() |