Upgrade to 1.1

This commit is contained in:
matthias@arch 2021-10-03 17:39:43 +02:00
parent b8f80c4e71
commit f6704ffb1f
3 changed files with 104 additions and 49 deletions

View File

@ -54,5 +54,10 @@ You can also install it system-wide using `sudo python3 -m pip install.`
After installing, you can use it like this: `nicole -ior -d ~/music/artist --rm_explicit` After installing, you can use it like this: `nicole -ior -d ~/music/artist --rm_explicit`
## Changelog
### 1.1
- Lyrics are now properly encoded.
- If a title contains paranthesis or umlaute, multiple possible urls will be checked.
## Importand Notice ## Importand Notice
This software comes with no warranty! This software comes with no warranty!

View File

@ -50,8 +50,6 @@ class Nicole:
def __del__(self): def __del__(self):
if self.write_history: if self.write_history:
self._write_history() self._write_history()
else:
print("NO")
def _load_history(self): def _load_history(self):
config_path = path.expanduser("~") + "/.config/nicole/" config_path = path.expanduser("~") + "/.config/nicole/"
@ -81,9 +79,10 @@ class Nicole:
failed_file.write(file + "\n") failed_file.write(file + "\n")
failed_file.close() failed_file.close()
def get_url_azlyrics(self, artist:str, title:str): def get_urls_azlyrics(self, artist:str, title:str):
""" """
Create a azlyrics html from the artist and title Create a azlyrics html from the artist and title
If the title contains paranthesis or äüö, there will be multiple versions, one that contains the (...)öäü and one that doesn't.
""" """
# convert to lower case # convert to lower case
artist = artist.casefold() artist = artist.casefold()
@ -99,55 +98,103 @@ class Nicole:
for match in re.finditer(r"\[.*\]", title): for match in re.finditer(r"\[.*\]", title):
title = title.replace(match.group(), "") title = title.replace(match.group(), "")
# remove spaces, from the title titles = [title]
for c in [' ', '-', ',', '.', '\'', '"', '°', '`', '´', '/', '!', '?', '#', '*', '(', ')']:
title = title.replace(c, '')
artist = artist.replace(c, '')
# replace some stuff # if title has(), create one version with and one without them
old = ['ä', 'ö', 'ü', '&'] if re.search(r"\(.*\)", title):
new = ['a', 'o', 'u', "and"] for match in re.finditer(r"\(.*\)", title):
# new2 = ['', '', '', "and"] title = title.replace(match.group(), "")
titles.append(title)
for i in range(len(old)): # some special chars
title = title.replace(old[i], new[i]) toNone = [' ', '-', ',', '.', '', '\'', '"', '°', '`', '´', '/', '!', '?', '#', '*', '(', ')']
artist = artist.replace(old[i], new[i]) for c in toNone:
artist = artist.replace(c, "")
return "https://azlyrics.com/lyrics/" + artist + '/' + title + ".html" #
# replace umlaute, create multiple versions
#
old = ['ä', 'ö', 'ü', 'ß', '&']
new1 = ['a', 'o', 'u', 'ss', "and"]
new2 = ['', '', '', '', "and"]
def get_lyrics_azlyrics(self, url): # in artist
if any(c in old for c in artist):
for i in range(len(old)):
artist = artist.replace(old[i], new1[i])
# multiple loops are needed since the array might grow
# umlaute
for n in range(len(titles)):
if any(c in old for c in titles[n]):
# replace titles[n] with the first version and append the second
title2 = titles[n]
for i in range(len(old)):
titles[n] = titles[n].replace(old[i], new1[i])
title2 = title2.replace(old[i], new2[i])
titles.append(title2)
# features
for title in titles:
match = re.search(r"fe?a?t\.?.*", title)
if match:
titles.append(title.replace(match.group(), ""))
# spaces, etc
for n in range(len(titles)):
for c in toNone:
titles[n] = titles[n].replace(c, '')
#
# create urls
#
urls = []
for title in titles:
urls.append("https://azlyrics.com/lyrics/" + artist + '/' + title + ".html")
print(urls)
return urls
def get_lyrics_azlyrics(self, urls):
""" """
Extract the lyrics from the html Extract the lyrics from the html
""" """
# visit the url
html = None
try:
html = str(ur.urlopen(url).read().decode("utf-8"))
sleep(self.delay) # azlyrics blocks requests if there is no delay
except Exception:
sleep(self.delay) # azlyrics blocks requests if there is no delay
return (False, f"Could not access url: {url}")
lyrics = None message = ""
match = re.search(r"<!\-\- Usage of azlyrics.com content by any third\-party lyrics provider is prohibited by our licensing agreement. Sorry about that. \-\->(.|\n)+?</div>", html) for url in urls:
if match: # visit the url
lyrics = match.group() html = None
for key, value in { try:
"<!-- Usage of azlyrics.com content by any third-party lyrics provider is prohibited by our licensing agreement. Sorry about that. -->": "", html = str(ur.urlopen(url).read().decode("utf-8"))
"</div>": "", sleep(self.delay) # azlyrics blocks requests if there is no delay
"\n": "", except Exception:
"<br>": "\n", sleep(self.delay) # azlyrics blocks requests if there is no delay
}.items(): message += f"Could not access url: {url}\n "
lyrics = lyrics.replace(key, value) continue
# remove all html tags lyrics = None
for tag in re.finditer(r"<.+>", lyrics): match = re.search(r"<!\-\- Usage of azlyrics.com content by any third\-party lyrics provider is prohibited by our licensing agreement. Sorry about that. \-\->(.|\n)+?</div>", html)
lyrics = lyrics.replace(tag.group(), "") if match:
for tag in re.finditer(r"</.+>", lyrics): lyrics = match.group()
lyrics = lyrics.replace(tag.group(), "") for key, value in {
"<!-- Usage of azlyrics.com content by any third-party lyrics provider is prohibited by our licensing agreement. Sorry about that. -->": "",
"</div>": "",
"\n": "",
"<br>": "\n",
}.items():
lyrics = lyrics.replace(key, value)
return (True, lyrics) # remove all html tags
return (False, f"Could not find lyrics in html: {url}") for tag in re.finditer(r"<.+>", lyrics):
lyrics = lyrics.replace(tag.group(), "")
for tag in re.finditer(r"</.+>", lyrics):
lyrics = lyrics.replace(tag.group(), "")
return (True, lyrics)
message += f"Could not lyrics in html for {url}\n "
message = message.strip(" \n")
return (False, message)
def process_dir(self, directory): def process_dir(self, directory):
if not path.isabs(directory): if not path.isabs(directory):
@ -254,9 +301,9 @@ class Nicole:
# currently the only supported site # currently the only supported site
if self.lyrics_site == "azlyrics": if self.lyrics_site == "azlyrics":
url = self.get_url_azlyrics(artist, title) urls = self.get_urls_azlyrics(artist, title)
success, lyrics = self.get_lyrics_azlyrics(url) success, lyrics = self.get_lyrics_azlyrics(urls)
if success: if success:
if self.test_run: if self.test_run:
print(f"{artist} - {title}:\n{lyrics}\n\n") print(f"{artist} - {title}:\n{lyrics}\n\n")
@ -283,6 +330,9 @@ class Nicole:
def main(): def main():
print("Nicole version 1.1")
# print("Get updates here: https://github.com/MatthiasQuintern/nicole")
helpstring = """Command line options: helpstring = """Command line options:
-d [directory] process directory [directory] -d [directory] process directory [directory]
-f [file] process file [file] -f [file] process file [file]

View File

@ -2,7 +2,7 @@ from setuptools import setup
setup( setup(
name="nicole", name="nicole",
version="1.0", version="1.1",
description="Add lyrics from azlyrics.com to mp3-tag", description="Add lyrics from azlyrics.com to mp3-tag",
author="Matthias Quintern", author="Matthias Quintern",