Lyrics are now properly encoded

This commit is contained in:
matthias@arch 2021-10-03 15:58:14 +02:00
parent 3292eca067
commit 28cdf7de69

View File

@ -107,6 +107,7 @@ class Nicole:
# replace some stuff # replace some stuff
old = ['ä', 'ö', 'ü', '&'] old = ['ä', 'ö', 'ü', '&']
new = ['a', 'o', 'u', "and"] new = ['a', 'o', 'u', "and"]
# new2 = ['', '', '', "and"]
for i in range(len(old)): for i in range(len(old)):
title = title.replace(old[i], new[i]) title = title.replace(old[i], new[i])
@ -121,23 +122,25 @@ class Nicole:
# visit the url # visit the url
html = None html = None
try: try:
html = str(ur.urlopen(url).read()) html = str(ur.urlopen(url).read().decode("utf-8"))
sleep(self.delay) # azlyrics blocks requests if there is no delay sleep(self.delay) # azlyrics blocks requests if there is no delay
except Exception: except Exception:
sleep(self.delay) # azlyrics blocks requests if there is no delay sleep(self.delay) # azlyrics blocks requests if there is no delay
return (False, f"Could not access url: {url}") return (False, f"Could not access url: {url}")
print(html)
lyrics = None lyrics = None
match = re.search(r"<!\-\- Usage of azlyrics.com content by any third\-party lyrics provider is prohibited by our licensing agreement. Sorry about that. \-\->.+?</div>", html) match = re.search(r"<!\-\- Usage of azlyrics.com content by any third\-party lyrics provider is prohibited by our licensing agreement. Sorry about that. \-\->(.|\n)+?</div>", html)
if match: if match:
lyrics = match.group() lyrics = match.group()
lyrics = lyrics.replace("<!-- Usage of azlyrics.com content by any third-party lyrics provider is prohibited by our licensing agreement. Sorry about that. -->", "") for key, value in {
lyrics = lyrics.replace("</div>", "") "<!-- Usage of azlyrics.com content by any third-party lyrics provider is prohibited by our licensing agreement. Sorry about that. -->": "",
lyrics = lyrics.replace("\n", "") "</div>": "",
lyrics = lyrics.replace("\\n", "") "\n": "",
lyrics = lyrics.replace("\\r", "") "<br>": "\n",
lyrics = lyrics.replace("\\", "") }.items():
lyrics = lyrics.replace("<br>", "\n") lyrics = lyrics.replace(key, value)
# remove all html tags # remove all html tags
for tag in re.finditer(r"<.+>", lyrics): for tag in re.finditer(r"<.+>", lyrics):
lyrics = lyrics.replace(tag.group(), "") lyrics = lyrics.replace(tag.group(), "")