added support for genius

2022-04-04 05:53:35 +02:00 · 2022-04-04 05:53:35 +02:00 · 9299140b87
commit 9299140b87
parent 6fc54858de
1 changed files with 159 additions and 55 deletions
--- a/nicole/nicole.py
+++ b/nicole/nicole.py
@ -1,17 +1,22 @@
 #!/bin/python3
 # Copyright ©  2022 Matthias Quintern.
 # This software comes with no warranty.
 # This software is licensed under the GPL3
 from mutagen import easyid3, id3, flac
-import urllib.request as ur
+
 import urllib
 import re
 from bs4 import BeautifulSoup
 from difflib import SequenceMatcher
 from json import loads
 from os import path, getcwd, listdir, mkdir
 from time import sleep
 from sys import argv
-import re
+# Der Name Nicole ist frei erfunden und hat keine Bedeutung.
-
+# Jeglicher Zusammenhang mit einer Website der DHL wird hiermit ausdrücklich ausgeschlossen.
 """
 Der Name Nicole ist frei erfunden und hat keine Bedeutung.
 Jeglicher Zusammenhang mit einer Website der DHL wird hiermit ausdrücklich ausgeschlossen.
 """
 class Nicole:
    """
@ -27,7 +32,7 @@ class Nicole:
        Nicole creates a azlyrics url from the title and artist mp3-tags of the file.
        The lyrics are extracted from the html document using regex.
    """
-    def __init__(self, test_run=False, silent=False, write_history=True, ignore_history=False, overwrite_tag=False, recursive=False, rm_explicit=False):
+    def __init__(self, test_run=False, silent=False, write_history=True, ignore_history=False, overwrite_tag=False, recursive=False, rm_explicit=False, lyrics_site="all"):
        self.test_run = test_run
        self.silent = silent
@ -37,9 +42,17 @@ class Nicole:
        self.overwrite_tag = overwrite_tag
        self.recursive = recursive
-        self.lyrics_site = "azlyrics"
+        self.lyrics_site = lyrics_site
        self.delay = 5  # enough delay so that azlyrics doesnt block the ip
        self.genius_search = "https://api.genius.com/search?q="
        self.genius_song = "https://api.genius.com/songs/"
        self.genius_access_token = "MzQaNvA53GOGvRTV8OXUbq2NCMahcnVre5EZmj-OcSjVleVO4kNwMVZicPsD5AL7"
        self.sanity_checks = True
        self.sanity_min_title_ratio = 0.6
        self.sanity_min_artist_ratio = 0.7
        self.history = []
        self.failed = []  # All files that failed
        if not self.ignore_history:
@ -82,7 +95,7 @@ class Nicole:
    def get_urls_azlyrics(self, artist:str, title:str):
        """
        Create a azlyrics html from the artist and title
-        If the title contains paranthesis or äüö, there will be multiple versions, one that contains the (...)öäü and one that doesn't.
+        If the title contains parenthesis or äüö, there will be multiple versions, one that contains the (...)öäü and one that doesn't.
        """
        # convert to lower case
        artist = artist.casefold()
@ -94,7 +107,7 @@ class Nicole:
        elif artist[0:4] == "the ":
            artist = artist[4:]
-        # remove anything in square bracketrs (eg [Explicit])
+        # remove anything in square brackets (eg [Explicit])
        for match in re.finditer(r"\[.*\]", title):
            title = title.replace(match.group(), "")
@ -153,7 +166,6 @@ class Nicole:
            urls.append("https://azlyrics.com/lyrics/" + artist + '/' + title + ".html")
        return urls
    def get_lyrics_azlyrics(self, urls):
        """
        Extract the lyrics from the html
@ -164,7 +176,7 @@ class Nicole:
            # visit the url
            html = None
            try:
-                html = str(ur.urlopen(url).read().decode("utf-8"))
+                html = str(urllib.request.urlopen(url).read().decode("utf-8"))
                sleep(self.delay) # azlyrics blocks requests if there is no delay
            except Exception:
                sleep(self.delay) # azlyrics blocks requests if there is no delay
@ -191,10 +203,82 @@ class Nicole:
                return (True, lyrics)
-            message += f"Could not lyrics in html for {url}\n    "
+            message += f"Could not find lyrics in html for {url}\n    "
        message = message.strip(" \n")
        return (False, message)
    def get_url_genius(self, artist:str, title:str):
        """
        Retrieve the url using the genius api:
        1) Get song id using search for song + artist
        2) Get url from song id
        """
        # get search results
        query_search = self.genius_search + urllib.parse.quote(f"{artist} {title}")
        request_search = urllib.request.Request(query_search)
        request_search.add_header("Authorization", f"Bearer {self.genius_access_token}")
        try:
            results = loads(urllib.request.urlopen(request_search).read())["response"]["hits"]
        except urllib.error.URLError:
            return (False, f"Could not access url: {query_search}")
        message = ""
        url = None
        i = 0
        while url is None and i < len(results):
            # check if result is song and then get url
            if results[i]["type"] == "song":
                song_id = results[i]["result"]["id"]
                # check if result is garbage by checking how similar title and artist names are
                if self.sanity_checks:
                    genius_artist = results[i]["result"]["primary_artist"]["name"]
                    genius_artist_featured = results[i]["result"]["artist_names"]
                    genius_title = results[i]["result"]["title"]
                    genius_title_featured = results[i]["result"]["title_with_featured"]
                    if SequenceMatcher(None, title, genius_title).ratio() < self.sanity_min_title_ratio:
                        if SequenceMatcher(None, title, genius_title_featured).ratio() < self.sanity_min_title_ratio:
                            message += f"Genius result: titles do not match enough: '{title}' and '{genius_title}'/'{genius_title_featured}'\n    "
                            i += 1
                            continue
                    if SequenceMatcher(None, artist, genius_artist).ratio() < self.sanity_min_artist_ratio:
                        if SequenceMatcher(None, artist, genius_artist_featured).ratio() < self.sanity_min_artist_ratio:
                            message += f"Genius result: artists do not match enough: '{artist}' and '{genius_artist}'/'{genius_artist_featured}'\n    "
                            i += 1
                            continue
                request_song = urllib.request.Request(f"{self.genius_song}{song_id}")
                request_song.add_header("Authorization", f"Bearer {self.genius_access_token}")
                try:
                    url = loads(urllib.request.urlopen(request_song).read())["response"]["song"]["url"]
                except urllib.error.URLError:
                    message += f"Genius result: Could not access url: '{self.genius_song}{song_id}'\n    "
            i += 1
        if not url:
            message += f"Could not find song lyrics on genius"
            return (False, message)
        return (True, url)
    def get_lyrics_genius(self, url):
        request_lyrics = urllib.request.Request(url)
        # request_lyrics.add_header("Authorization", f"Bearer {self.genius_access_token}")
        request_lyrics.add_header("User-Agent", "Mozilla/5.0")
        try:
            html = urllib.request.urlopen(request_lyrics).read()
        except urllib.error.URLError:
            return (False, f"Could not access url: {url}")
        # extract lyrics from html: lyrics are in divs with "data-lyrics-container=true"
        lyrics = ""
        soup = BeautifulSoup(html, "html.parser")
        for br in soup.find_all("br"):
            br.replaceWith("\n")
        divs = soup.find_all("div", attrs={"data-lyrics-container": "true"})
        if not divs:
            return (False, f"Could not find lyrics in html: {url}")
        for div in divs:
            lyrics += div.get_text(separator="")
        return (True, lyrics)
    def process_dir(self, directory):
        if not path.isabs(directory):
            directory = path.normpath(getcwd() + "/" + directory)
@ -204,7 +288,6 @@ class Nicole:
        if not self.silent:
            print("\nProcessing directory: " + directory)
        entries = listdir(directory)
        entries.sort()
@ -230,7 +313,7 @@ class Nicole:
                            print(f"✓ {entry}") 
                        else:
                            print(f"✕ {entry}")
-                        print("   " + message)
+                        print("    " + message)
            elif path.isdir(entry) and self.recursive:
@ -301,39 +384,54 @@ class Nicole:
                        audio.save()
                        print(f"Removed '{word}' from the title.")
-        # currently the only supported site
+        lyrics = "Sample Lyrics"
-        if self.lyrics_site == "azlyrics":
+        success = False
-            urls = self.get_urls_azlyrics(artist, title)
+        site = "Sample Site"
-            
+        message = ""
-            success, lyrics = self.get_lyrics_azlyrics(urls)
+        # try genius
        if self.lyrics_site in ["all", "genius"]:
            success, url = self.get_url_genius(artist, title)
            if success:
-                if self.test_run:
+                success, lyrics = self.get_lyrics_genius(url)
-                    print(f"{artist} - {title}:\n{lyrics}\n\n")
+                if not success:
-                # write to tags
+                    message += lyrics + "\n    "  # lyrics is error message
-                else:
+                site = "genius"
                    if type(audio) == id3.ID3:
                        audio.add(id3.USLT(encoding=id3.Encoding.UTF8, lang="   ", text=lyrics))
                        audio.save(v2_version=4)
                    elif type(audio) == flac.FLAC:
                        audio["LYRICS"] = lyrics
                        audio.save()
                    else:
                        return (False, f"Could not write lyrics.")
                    # add to history
                    if self.write_history and file not in self.history:
                        self.history.append(file)
                return (True, f"Written lyrics to {artist} - {title}")
            else:
-                return (False, lyrics)  # lyrics is error message here
+                message += url + "\n    "  # url is error message
        # try azlyrics
        if not success and self.lyrics_site in ["all", "azlyrics"]:
            urls = self.get_urls_azlyrics(artist, title)
            success, lyrics = self.get_lyrics_azlyrics(urls)
            site = "azlyrics"
            if not success:
                message += lyrics
        # if found lyrics
        if success:
            if self.test_run:
                print(f"\n\n{artist} - {title}:\n{lyrics}\n")
            # write to tags
            else:
                if type(audio) == id3.ID3:
                    audio.add(id3.USLT(encoding=id3.Encoding.UTF8, lang="   ", text=lyrics))
                    audio.save(v2_version=4)
                elif type(audio) == flac.FLAC:
                    audio["LYRICS"] = lyrics
                    audio.save()
                else:
                    return (False, f"Could find lyrics but failed to write the tag.")
-        return (False, "Failed for unknown reason.")
+                # add to history
                if self.write_history and file not in self.history:
                    self.history.append(file)
            message += f"Written lyrics from {site} to {artist} - {title}"
            return (True, message)
        else:
            return (False, message.strip("\n    "))
 def main():
-    print("Nicole version 1.1")
+    print("Nicole version 2.0")
    # print("Get updates here: https://github.com/MatthiasQuintern/nicole")
    helpstring = """Command line options:
    -d [directory]      process directory [directory]
@ -345,24 +443,25 @@ def main():
    -o                  overwrite if the file already has lyrics
    -t                  test, do not write lyrics to file, but print to console
    -h                  show this
-    --rm_explicit       remove the "[Explicit]" lyrics warning from the songs title tag"""
+    --rm_explicit       remove the "[Explicit]" lyrics warning from the songs title tag
    --site [site]       use only [site]: azlyrics or genius
    Visit https://github.com/MatthiasQuintern/nicole for updates and further help."""
    args = []
    if len(argv) > 1:
        # iterate over argv list and extract the args
        i = 1
        while i < len(argv):
            arg = argv[i]
-            if "--" in arg:
+            if arg[0] == "-":
                args.append(arg.replace("--", ""))
            elif "-" in arg:
                # check if option with arg, if yes add tuple to args
                if len(argv) > i + 1 and argv[i+1][0] != "-":
                    args.append((arg.replace("-", ""), argv[i+1]))
                    i += 1
                elif not "--" in arg:
                    for char in arg.replace("-", ""):
                        args.append(char)
                else:
-                    for c in arg.replace("-", ""):
+                    args.append(arg.replace("-", ""))
                        args.append(c)
            else:
                print(f"Invalid or missing argument: '{arg}'")
                print(helpstring)
@ -383,11 +482,16 @@ def main():
    directory = None
    file = None
    site = "all"
    for arg in args:
        if type(arg) == tuple:
            if arg[0] == "d": directory = arg[1]
            elif arg[0] == "f": file = arg[1]
            elif arg[0] == "site":
                if arg[1] in ["genius", "azlyrics", "all"]: site = arg[1]
                else:
                    print(f"Invalid site: '{arg[1]}'")
        elif arg in options.keys():
            # flip the bool associated with the char
@ -403,9 +507,9 @@ def main():
    if options["h"]:
        print(helpstring)
        return 0
- 
+
    # create nicole instance
-    nicole = Nicole(test_run=options["t"], silent=options["s"], write_history=options["n"], ignore_history=options["i"], overwrite_tag=options["o"], recursive=options["r"], rm_explicit=options["rm_explicit"])
+    nicole = Nicole(test_run=options["t"], silent=options["s"], write_history=options["n"], ignore_history=options["i"], overwrite_tag=options["o"], recursive=options["r"], rm_explicit=options["rm_explicit"], lyrics_site=site)
    # start with file or directory
    if file:
@ -415,7 +519,7 @@ def main():
                print(f"✓ {file}") 
            else:
                print(f"✕ {file}")
-            print("   " + message)
+            print("    " + message)
    elif directory:
        try:
            nicole.process_dir(directory)