Upgrade to 1.1

2021-10-03 17:39:43 +02:00 · 2021-10-03 17:39:43 +02:00 · f6704ffb1f
commit f6704ffb1f
parent b8f80c4e71
3 changed files with 104 additions and 49 deletions
--- a/README.md
+++ b/README.md
@ -54,5 +54,10 @@ You can also install it system-wide using `sudo python3 -m pip install.`
 After installing, you can use it like this: `nicole -ior -d ~/music/artist --rm_explicit`
 ## Changelog
 ### 1.1
 - Lyrics are now properly encoded.
 - If a title contains paranthesis or umlaute, multiple possible urls will be checked.
 ## Importand Notice
 This software comes with no warranty!
--- a/nicole/nicole.py
+++ b/nicole/nicole.py
@ -50,8 +50,6 @@ class Nicole:
    def __del__(self):
        if self.write_history:
            self._write_history()
        else:
            print("NO")
    def _load_history(self):
        config_path = path.expanduser("~") + "/.config/nicole/"
@ -81,9 +79,10 @@ class Nicole:
            failed_file.write(file + "\n")
        failed_file.close()
-    def get_url_azlyrics(self, artist:str, title:str):
+    def get_urls_azlyrics(self, artist:str, title:str):
        """
        Create a azlyrics html from the artist and title
        If the title contains paranthesis or äüö, there will be multiple versions, one that contains the (...)öäü and one that doesn't.
        """
        # convert to lower case
        artist = artist.casefold()
@ -99,55 +98,103 @@ class Nicole:
        for match in re.finditer(r"\[.*\]", title):
            title = title.replace(match.group(), "")
-        # remove spaces, from the title
+        titles = [title]
        for c in [' ', '-', ',', '.', '\'', '"', '°', '`', '´', '/', '!', '?', '#', '*', '(', ')']:
            title = title.replace(c, '')
            artist = artist.replace(c, '')
-        # replace some stuff
+        # if title has(), create one version with and one without them
-        old = ['ä', 'ö', 'ü', '&']
+        if re.search(r"\(.*\)", title):
-        new = ['a', 'o', 'u', "and"]
+            for match in re.finditer(r"\(.*\)", title):
-        # new2 = ['', '', '', "and"]
+                title = title.replace(match.group(), "")
            titles.append(title)
-        for i in range(len(old)):
+        # some special chars
-            title = title.replace(old[i], new[i])
+        toNone = [' ', '-', ',', '.', '…', '\'', '"', '°', '`', '´', '/', '!', '?', '#', '*', '(', ')']
-            artist = artist.replace(old[i], new[i])
+        for c in toNone:
            artist = artist.replace(c, "")
-        return "https://azlyrics.com/lyrics/" + artist + '/' + title + ".html"
+        #
        # replace umlaute, create multiple versions
        #
        old = ['ä', 'ö', 'ü', 'ß', '&']
        new1 = ['a', 'o', 'u', 'ss', "and"]
        new2 = ['', '', '', '', "and"]
-    def get_lyrics_azlyrics(self, url):
+        # in artist
        if any(c in old for c in artist):
            for i in range(len(old)):
                artist = artist.replace(old[i], new1[i])
        # multiple loops are needed since the array might grow
        # umlaute
        for n in range(len(titles)):
            if any(c in old for c in titles[n]):
                # replace titles[n] with the first version and append the second
                title2 = titles[n]
                for i in range(len(old)):
                    titles[n] = titles[n].replace(old[i], new1[i])
                    title2 = title2.replace(old[i], new2[i])
                titles.append(title2)
        # features
        for title in titles:
            match = re.search(r"fe?a?t\.?.*", title)
            if match:
                titles.append(title.replace(match.group(), ""))
        # spaces, etc
        for n in range(len(titles)):
            for c in toNone:
                titles[n] = titles[n].replace(c, '')
        #
        # create urls
        #
        urls = []
        for title in titles:
            urls.append("https://azlyrics.com/lyrics/" + artist + '/' + title + ".html")
        print(urls)
        return urls
    def get_lyrics_azlyrics(self, urls):
        """
        Extract the lyrics from the html
        """
        # visit the url
        html = None
        try:
            html = str(ur.urlopen(url).read().decode("utf-8"))
            sleep(self.delay) # azlyrics blocks requests if there is no delay
        except Exception:
            sleep(self.delay) # azlyrics blocks requests if there is no delay
            return (False, f"Could not access url: {url}")
-        lyrics = None
+        message = ""
-        match = re.search(r"<!\-\- Usage of azlyrics.com content by any third\-party lyrics provider is prohibited by our licensing agreement. Sorry about that. \-\->(.|\n)+?</div>", html)
+        for url in urls:
-        if match:
+            # visit the url
-            lyrics = match.group()
+            html = None
-            for key, value in {
+            try:
-                "<!-- Usage of azlyrics.com content by any third-party lyrics provider is prohibited by our licensing agreement. Sorry about that. -->": "",
+                html = str(ur.urlopen(url).read().decode("utf-8"))
-                "</div>": "",
+                sleep(self.delay) # azlyrics blocks requests if there is no delay
-                "\n": "",
+            except Exception:
-                "<br>": "\n",
+                sleep(self.delay) # azlyrics blocks requests if there is no delay
-            }.items():
+                message += f"Could not access url: {url}\n    "
-                lyrics = lyrics.replace(key, value)
+                continue
-            # remove all html tags
+            lyrics = None
-            for tag in re.finditer(r"<.+>", lyrics):
+            match = re.search(r"<!\-\- Usage of azlyrics.com content by any third\-party lyrics provider is prohibited by our licensing agreement. Sorry about that. \-\->(.|\n)+?</div>", html)
-                lyrics = lyrics.replace(tag.group(), "")
+            if match:
-            for tag in re.finditer(r"</.+>", lyrics):
+                lyrics = match.group()
-                lyrics = lyrics.replace(tag.group(), "")
+                for key, value in {
                    "<!-- Usage of azlyrics.com content by any third-party lyrics provider is prohibited by our licensing agreement. Sorry about that. -->": "",
                    "</div>": "",
                    "\n": "",
                    "<br>": "\n",
                }.items():
                    lyrics = lyrics.replace(key, value)
-            return (True, lyrics)
+                # remove all html tags
-        return (False, f"Could not find lyrics in html: {url}")
+                for tag in re.finditer(r"<.+>", lyrics):
                    lyrics = lyrics.replace(tag.group(), "")
                for tag in re.finditer(r"</.+>", lyrics):
                    lyrics = lyrics.replace(tag.group(), "")
                return (True, lyrics)
            message += f"Could not lyrics in html for {url}\n    "
        message = message.strip(" \n")
        return (False, message)
    def process_dir(self, directory):
        if not path.isabs(directory):
@ -254,9 +301,9 @@ class Nicole:
        # currently the only supported site
        if self.lyrics_site == "azlyrics":
-            url = self.get_url_azlyrics(artist, title)
+            urls = self.get_urls_azlyrics(artist, title)
-            success, lyrics = self.get_lyrics_azlyrics(url)
+            success, lyrics = self.get_lyrics_azlyrics(urls)
            if success:
                if self.test_run:
                    print(f"{artist} - {title}:\n{lyrics}\n\n")
@ -283,6 +330,9 @@ class Nicole:
 def main():
    print("Nicole version 1.1")
    # print("Get updates here: https://github.com/MatthiasQuintern/nicole")
    helpstring = """Command line options:
    -d [directory]      process directory [directory]
    -f [file]           process file [file]
--- a/setup.py
+++ b/setup.py
@ -2,7 +2,7 @@ from setuptools import setup
 setup(
        name="nicole",
-        version="1.0",
+        version="1.1",
        description="Add lyrics from azlyrics.com to mp3-tag",
        author="Matthias Quintern",