added support for genius
This commit is contained in:
parent
6fc54858de
commit
9299140b87
176
nicole/nicole.py
176
nicole/nicole.py
@ -1,17 +1,22 @@
|
|||||||
|
#!/bin/python3
|
||||||
|
# Copyright © 2022 Matthias Quintern.
|
||||||
|
# This software comes with no warranty.
|
||||||
|
# This software is licensed under the GPL3
|
||||||
|
|
||||||
from mutagen import easyid3, id3, flac
|
from mutagen import easyid3, id3, flac
|
||||||
import urllib.request as ur
|
|
||||||
|
import urllib
|
||||||
|
import re
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
from json import loads
|
||||||
|
|
||||||
from os import path, getcwd, listdir, mkdir
|
from os import path, getcwd, listdir, mkdir
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
from sys import argv
|
from sys import argv
|
||||||
|
|
||||||
import re
|
# Der Name Nicole ist frei erfunden und hat keine Bedeutung.
|
||||||
|
# Jeglicher Zusammenhang mit einer Website der DHL wird hiermit ausdrücklich ausgeschlossen.
|
||||||
"""
|
|
||||||
Der Name Nicole ist frei erfunden und hat keine Bedeutung.
|
|
||||||
Jeglicher Zusammenhang mit einer Website der DHL wird hiermit ausdrücklich ausgeschlossen.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class Nicole:
|
class Nicole:
|
||||||
"""
|
"""
|
||||||
@ -27,7 +32,7 @@ class Nicole:
|
|||||||
Nicole creates a azlyrics url from the title and artist mp3-tags of the file.
|
Nicole creates a azlyrics url from the title and artist mp3-tags of the file.
|
||||||
The lyrics are extracted from the html document using regex.
|
The lyrics are extracted from the html document using regex.
|
||||||
"""
|
"""
|
||||||
def __init__(self, test_run=False, silent=False, write_history=True, ignore_history=False, overwrite_tag=False, recursive=False, rm_explicit=False):
|
def __init__(self, test_run=False, silent=False, write_history=True, ignore_history=False, overwrite_tag=False, recursive=False, rm_explicit=False, lyrics_site="all"):
|
||||||
self.test_run = test_run
|
self.test_run = test_run
|
||||||
self.silent = silent
|
self.silent = silent
|
||||||
|
|
||||||
@ -37,9 +42,17 @@ class Nicole:
|
|||||||
self.overwrite_tag = overwrite_tag
|
self.overwrite_tag = overwrite_tag
|
||||||
self.recursive = recursive
|
self.recursive = recursive
|
||||||
|
|
||||||
self.lyrics_site = "azlyrics"
|
self.lyrics_site = lyrics_site
|
||||||
self.delay = 5 # enough delay so that azlyrics doesnt block the ip
|
self.delay = 5 # enough delay so that azlyrics doesnt block the ip
|
||||||
|
|
||||||
|
self.genius_search = "https://api.genius.com/search?q="
|
||||||
|
self.genius_song = "https://api.genius.com/songs/"
|
||||||
|
self.genius_access_token = "MzQaNvA53GOGvRTV8OXUbq2NCMahcnVre5EZmj-OcSjVleVO4kNwMVZicPsD5AL7"
|
||||||
|
|
||||||
|
self.sanity_checks = True
|
||||||
|
self.sanity_min_title_ratio = 0.6
|
||||||
|
self.sanity_min_artist_ratio = 0.7
|
||||||
|
|
||||||
self.history = []
|
self.history = []
|
||||||
self.failed = [] # All files that failed
|
self.failed = [] # All files that failed
|
||||||
if not self.ignore_history:
|
if not self.ignore_history:
|
||||||
@ -82,7 +95,7 @@ class Nicole:
|
|||||||
def get_urls_azlyrics(self, artist:str, title:str):
|
def get_urls_azlyrics(self, artist:str, title:str):
|
||||||
"""
|
"""
|
||||||
Create a azlyrics html from the artist and title
|
Create a azlyrics html from the artist and title
|
||||||
If the title contains paranthesis or äüö, there will be multiple versions, one that contains the (...)öäü and one that doesn't.
|
If the title contains parenthesis or äüö, there will be multiple versions, one that contains the (...)öäü and one that doesn't.
|
||||||
"""
|
"""
|
||||||
# convert to lower case
|
# convert to lower case
|
||||||
artist = artist.casefold()
|
artist = artist.casefold()
|
||||||
@ -94,7 +107,7 @@ class Nicole:
|
|||||||
elif artist[0:4] == "the ":
|
elif artist[0:4] == "the ":
|
||||||
artist = artist[4:]
|
artist = artist[4:]
|
||||||
|
|
||||||
# remove anything in square bracketrs (eg [Explicit])
|
# remove anything in square brackets (eg [Explicit])
|
||||||
for match in re.finditer(r"\[.*\]", title):
|
for match in re.finditer(r"\[.*\]", title):
|
||||||
title = title.replace(match.group(), "")
|
title = title.replace(match.group(), "")
|
||||||
|
|
||||||
@ -153,7 +166,6 @@ class Nicole:
|
|||||||
urls.append("https://azlyrics.com/lyrics/" + artist + '/' + title + ".html")
|
urls.append("https://azlyrics.com/lyrics/" + artist + '/' + title + ".html")
|
||||||
return urls
|
return urls
|
||||||
|
|
||||||
|
|
||||||
def get_lyrics_azlyrics(self, urls):
|
def get_lyrics_azlyrics(self, urls):
|
||||||
"""
|
"""
|
||||||
Extract the lyrics from the html
|
Extract the lyrics from the html
|
||||||
@ -164,7 +176,7 @@ class Nicole:
|
|||||||
# visit the url
|
# visit the url
|
||||||
html = None
|
html = None
|
||||||
try:
|
try:
|
||||||
html = str(ur.urlopen(url).read().decode("utf-8"))
|
html = str(urllib.request.urlopen(url).read().decode("utf-8"))
|
||||||
sleep(self.delay) # azlyrics blocks requests if there is no delay
|
sleep(self.delay) # azlyrics blocks requests if there is no delay
|
||||||
except Exception:
|
except Exception:
|
||||||
sleep(self.delay) # azlyrics blocks requests if there is no delay
|
sleep(self.delay) # azlyrics blocks requests if there is no delay
|
||||||
@ -191,10 +203,82 @@ class Nicole:
|
|||||||
|
|
||||||
return (True, lyrics)
|
return (True, lyrics)
|
||||||
|
|
||||||
message += f"Could not lyrics in html for {url}\n "
|
message += f"Could not find lyrics in html for {url}\n "
|
||||||
message = message.strip(" \n")
|
message = message.strip(" \n")
|
||||||
return (False, message)
|
return (False, message)
|
||||||
|
|
||||||
|
def get_url_genius(self, artist:str, title:str):
|
||||||
|
"""
|
||||||
|
Retrieve the url using the genius api:
|
||||||
|
1) Get song id using search for song + artist
|
||||||
|
2) Get url from song id
|
||||||
|
"""
|
||||||
|
# get search results
|
||||||
|
query_search = self.genius_search + urllib.parse.quote(f"{artist} {title}")
|
||||||
|
request_search = urllib.request.Request(query_search)
|
||||||
|
request_search.add_header("Authorization", f"Bearer {self.genius_access_token}")
|
||||||
|
try:
|
||||||
|
results = loads(urllib.request.urlopen(request_search).read())["response"]["hits"]
|
||||||
|
except urllib.error.URLError:
|
||||||
|
return (False, f"Could not access url: {query_search}")
|
||||||
|
|
||||||
|
message = ""
|
||||||
|
url = None
|
||||||
|
i = 0
|
||||||
|
while url is None and i < len(results):
|
||||||
|
# check if result is song and then get url
|
||||||
|
if results[i]["type"] == "song":
|
||||||
|
song_id = results[i]["result"]["id"]
|
||||||
|
# check if result is garbage by checking how similar title and artist names are
|
||||||
|
if self.sanity_checks:
|
||||||
|
genius_artist = results[i]["result"]["primary_artist"]["name"]
|
||||||
|
genius_artist_featured = results[i]["result"]["artist_names"]
|
||||||
|
genius_title = results[i]["result"]["title"]
|
||||||
|
genius_title_featured = results[i]["result"]["title_with_featured"]
|
||||||
|
if SequenceMatcher(None, title, genius_title).ratio() < self.sanity_min_title_ratio:
|
||||||
|
if SequenceMatcher(None, title, genius_title_featured).ratio() < self.sanity_min_title_ratio:
|
||||||
|
message += f"Genius result: titles do not match enough: '{title}' and '{genius_title}'/'{genius_title_featured}'\n "
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if SequenceMatcher(None, artist, genius_artist).ratio() < self.sanity_min_artist_ratio:
|
||||||
|
if SequenceMatcher(None, artist, genius_artist_featured).ratio() < self.sanity_min_artist_ratio:
|
||||||
|
message += f"Genius result: artists do not match enough: '{artist}' and '{genius_artist}'/'{genius_artist_featured}'\n "
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
request_song = urllib.request.Request(f"{self.genius_song}{song_id}")
|
||||||
|
request_song.add_header("Authorization", f"Bearer {self.genius_access_token}")
|
||||||
|
try:
|
||||||
|
url = loads(urllib.request.urlopen(request_song).read())["response"]["song"]["url"]
|
||||||
|
except urllib.error.URLError:
|
||||||
|
message += f"Genius result: Could not access url: '{self.genius_song}{song_id}'\n "
|
||||||
|
i += 1
|
||||||
|
if not url:
|
||||||
|
message += f"Could not find song lyrics on genius"
|
||||||
|
return (False, message)
|
||||||
|
return (True, url)
|
||||||
|
|
||||||
|
def get_lyrics_genius(self, url):
|
||||||
|
request_lyrics = urllib.request.Request(url)
|
||||||
|
# request_lyrics.add_header("Authorization", f"Bearer {self.genius_access_token}")
|
||||||
|
request_lyrics.add_header("User-Agent", "Mozilla/5.0")
|
||||||
|
try:
|
||||||
|
html = urllib.request.urlopen(request_lyrics).read()
|
||||||
|
except urllib.error.URLError:
|
||||||
|
return (False, f"Could not access url: {url}")
|
||||||
|
|
||||||
|
# extract lyrics from html: lyrics are in divs with "data-lyrics-container=true"
|
||||||
|
lyrics = ""
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
for br in soup.find_all("br"):
|
||||||
|
br.replaceWith("\n")
|
||||||
|
divs = soup.find_all("div", attrs={"data-lyrics-container": "true"})
|
||||||
|
if not divs:
|
||||||
|
return (False, f"Could not find lyrics in html: {url}")
|
||||||
|
for div in divs:
|
||||||
|
lyrics += div.get_text(separator="")
|
||||||
|
return (True, lyrics)
|
||||||
|
|
||||||
def process_dir(self, directory):
|
def process_dir(self, directory):
|
||||||
if not path.isabs(directory):
|
if not path.isabs(directory):
|
||||||
directory = path.normpath(getcwd() + "/" + directory)
|
directory = path.normpath(getcwd() + "/" + directory)
|
||||||
@ -204,7 +288,6 @@ class Nicole:
|
|||||||
if not self.silent:
|
if not self.silent:
|
||||||
print("\nProcessing directory: " + directory)
|
print("\nProcessing directory: " + directory)
|
||||||
|
|
||||||
|
|
||||||
entries = listdir(directory)
|
entries = listdir(directory)
|
||||||
entries.sort()
|
entries.sort()
|
||||||
|
|
||||||
@ -301,14 +384,31 @@ class Nicole:
|
|||||||
audio.save()
|
audio.save()
|
||||||
print(f"Removed '{word}' from the title.")
|
print(f"Removed '{word}' from the title.")
|
||||||
|
|
||||||
# currently the only supported site
|
lyrics = "Sample Lyrics"
|
||||||
if self.lyrics_site == "azlyrics":
|
success = False
|
||||||
|
site = "Sample Site"
|
||||||
|
message = ""
|
||||||
|
# try genius
|
||||||
|
if self.lyrics_site in ["all", "genius"]:
|
||||||
|
success, url = self.get_url_genius(artist, title)
|
||||||
|
if success:
|
||||||
|
success, lyrics = self.get_lyrics_genius(url)
|
||||||
|
if not success:
|
||||||
|
message += lyrics + "\n " # lyrics is error message
|
||||||
|
site = "genius"
|
||||||
|
else:
|
||||||
|
message += url + "\n " # url is error message
|
||||||
|
# try azlyrics
|
||||||
|
if not success and self.lyrics_site in ["all", "azlyrics"]:
|
||||||
urls = self.get_urls_azlyrics(artist, title)
|
urls = self.get_urls_azlyrics(artist, title)
|
||||||
|
|
||||||
success, lyrics = self.get_lyrics_azlyrics(urls)
|
success, lyrics = self.get_lyrics_azlyrics(urls)
|
||||||
|
site = "azlyrics"
|
||||||
|
if not success:
|
||||||
|
message += lyrics
|
||||||
|
# if found lyrics
|
||||||
if success:
|
if success:
|
||||||
if self.test_run:
|
if self.test_run:
|
||||||
print(f"{artist} - {title}:\n{lyrics}\n\n")
|
print(f"\n\n{artist} - {title}:\n{lyrics}\n")
|
||||||
# write to tags
|
# write to tags
|
||||||
else:
|
else:
|
||||||
if type(audio) == id3.ID3:
|
if type(audio) == id3.ID3:
|
||||||
@ -318,22 +418,20 @@ class Nicole:
|
|||||||
audio["LYRICS"] = lyrics
|
audio["LYRICS"] = lyrics
|
||||||
audio.save()
|
audio.save()
|
||||||
else:
|
else:
|
||||||
return (False, f"Could not write lyrics.")
|
return (False, f"Could find lyrics but failed to write the tag.")
|
||||||
|
|
||||||
# add to history
|
# add to history
|
||||||
if self.write_history and file not in self.history:
|
if self.write_history and file not in self.history:
|
||||||
self.history.append(file)
|
self.history.append(file)
|
||||||
|
|
||||||
return (True, f"Written lyrics to {artist} - {title}")
|
message += f"Written lyrics from {site} to {artist} - {title}"
|
||||||
|
return (True, message)
|
||||||
else:
|
else:
|
||||||
return (False, lyrics) # lyrics is error message here
|
return (False, message.strip("\n "))
|
||||||
|
|
||||||
return (False, "Failed for unknown reason.")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
print("Nicole version 1.1")
|
print("Nicole version 2.0")
|
||||||
# print("Get updates here: https://github.com/MatthiasQuintern/nicole")
|
|
||||||
|
|
||||||
helpstring = """Command line options:
|
helpstring = """Command line options:
|
||||||
-d [directory] process directory [directory]
|
-d [directory] process directory [directory]
|
||||||
@ -345,24 +443,25 @@ def main():
|
|||||||
-o overwrite if the file already has lyrics
|
-o overwrite if the file already has lyrics
|
||||||
-t test, do not write lyrics to file, but print to console
|
-t test, do not write lyrics to file, but print to console
|
||||||
-h show this
|
-h show this
|
||||||
--rm_explicit remove the "[Explicit]" lyrics warning from the songs title tag"""
|
--rm_explicit remove the "[Explicit]" lyrics warning from the songs title tag
|
||||||
|
--site [site] use only [site]: azlyrics or genius
|
||||||
|
Visit https://github.com/MatthiasQuintern/nicole for updates and further help."""
|
||||||
args = []
|
args = []
|
||||||
if len(argv) > 1:
|
if len(argv) > 1:
|
||||||
# iterate over argv list and extract the args
|
# iterate over argv list and extract the args
|
||||||
i = 1
|
i = 1
|
||||||
while i < len(argv):
|
while i < len(argv):
|
||||||
arg = argv[i]
|
arg = argv[i]
|
||||||
if "--" in arg:
|
if arg[0] == "-":
|
||||||
args.append(arg.replace("--", ""))
|
|
||||||
|
|
||||||
elif "-" in arg:
|
|
||||||
# check if option with arg, if yes add tuple to args
|
# check if option with arg, if yes add tuple to args
|
||||||
if len(argv) > i + 1 and argv[i+1][0] != "-":
|
if len(argv) > i + 1 and argv[i+1][0] != "-":
|
||||||
args.append((arg.replace("-", ""), argv[i+1]))
|
args.append((arg.replace("-", ""), argv[i+1]))
|
||||||
i += 1
|
i += 1
|
||||||
|
elif not "--" in arg:
|
||||||
|
for char in arg.replace("-", ""):
|
||||||
|
args.append(char)
|
||||||
else:
|
else:
|
||||||
for c in arg.replace("-", ""):
|
args.append(arg.replace("-", ""))
|
||||||
args.append(c)
|
|
||||||
else:
|
else:
|
||||||
print(f"Invalid or missing argument: '{arg}'")
|
print(f"Invalid or missing argument: '{arg}'")
|
||||||
print(helpstring)
|
print(helpstring)
|
||||||
@ -383,11 +482,16 @@ def main():
|
|||||||
|
|
||||||
directory = None
|
directory = None
|
||||||
file = None
|
file = None
|
||||||
|
site = "all"
|
||||||
|
|
||||||
for arg in args:
|
for arg in args:
|
||||||
if type(arg) == tuple:
|
if type(arg) == tuple:
|
||||||
if arg[0] == "d": directory = arg[1]
|
if arg[0] == "d": directory = arg[1]
|
||||||
elif arg[0] == "f": file = arg[1]
|
elif arg[0] == "f": file = arg[1]
|
||||||
|
elif arg[0] == "site":
|
||||||
|
if arg[1] in ["genius", "azlyrics", "all"]: site = arg[1]
|
||||||
|
else:
|
||||||
|
print(f"Invalid site: '{arg[1]}'")
|
||||||
|
|
||||||
elif arg in options.keys():
|
elif arg in options.keys():
|
||||||
# flip the bool associated with the char
|
# flip the bool associated with the char
|
||||||
@ -405,7 +509,7 @@ def main():
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
# create nicole instance
|
# create nicole instance
|
||||||
nicole = Nicole(test_run=options["t"], silent=options["s"], write_history=options["n"], ignore_history=options["i"], overwrite_tag=options["o"], recursive=options["r"], rm_explicit=options["rm_explicit"])
|
nicole = Nicole(test_run=options["t"], silent=options["s"], write_history=options["n"], ignore_history=options["i"], overwrite_tag=options["o"], recursive=options["r"], rm_explicit=options["rm_explicit"], lyrics_site=site)
|
||||||
|
|
||||||
# start with file or directory
|
# start with file or directory
|
||||||
if file:
|
if file:
|
||||||
|
Loading…
Reference in New Issue
Block a user