added support for genius

This commit is contained in:
matthias@arch 2022-04-04 05:53:35 +02:00
parent 6fc54858de
commit 9299140b87

View File

@ -1,17 +1,22 @@
#!/bin/python3
# Copyright © 2022 Matthias Quintern.
# This software comes with no warranty.
# This software is licensed under the GPL3
from mutagen import easyid3, id3, flac
import urllib.request as ur
import urllib
import re
from bs4 import BeautifulSoup
from difflib import SequenceMatcher
from json import loads
from os import path, getcwd, listdir, mkdir
from time import sleep
from sys import argv
import re
"""
Der Name Nicole ist frei erfunden und hat keine Bedeutung.
Jeglicher Zusammenhang mit einer Website der DHL wird hiermit ausdrücklich ausgeschlossen.
"""
# Der Name Nicole ist frei erfunden und hat keine Bedeutung.
# Jeglicher Zusammenhang mit einer Website der DHL wird hiermit ausdrücklich ausgeschlossen.
class Nicole:
"""
@ -27,7 +32,7 @@ class Nicole:
Nicole creates a azlyrics url from the title and artist mp3-tags of the file.
The lyrics are extracted from the html document using regex.
"""
def __init__(self, test_run=False, silent=False, write_history=True, ignore_history=False, overwrite_tag=False, recursive=False, rm_explicit=False):
def __init__(self, test_run=False, silent=False, write_history=True, ignore_history=False, overwrite_tag=False, recursive=False, rm_explicit=False, lyrics_site="all"):
self.test_run = test_run
self.silent = silent
@ -37,9 +42,17 @@ class Nicole:
self.overwrite_tag = overwrite_tag
self.recursive = recursive
self.lyrics_site = "azlyrics"
self.lyrics_site = lyrics_site
self.delay = 5 # enough delay so that azlyrics doesnt block the ip
self.genius_search = "https://api.genius.com/search?q="
self.genius_song = "https://api.genius.com/songs/"
self.genius_access_token = "MzQaNvA53GOGvRTV8OXUbq2NCMahcnVre5EZmj-OcSjVleVO4kNwMVZicPsD5AL7"
self.sanity_checks = True
self.sanity_min_title_ratio = 0.6
self.sanity_min_artist_ratio = 0.7
self.history = []
self.failed = [] # All files that failed
if not self.ignore_history:
@ -82,7 +95,7 @@ class Nicole:
def get_urls_azlyrics(self, artist:str, title:str):
"""
Create a azlyrics html from the artist and title
If the title contains paranthesis or äüö, there will be multiple versions, one that contains the (...)öäü and one that doesn't.
If the title contains parenthesis or äüö, there will be multiple versions, one that contains the (...)öäü and one that doesn't.
"""
# convert to lower case
artist = artist.casefold()
@ -94,7 +107,7 @@ class Nicole:
elif artist[0:4] == "the ":
artist = artist[4:]
# remove anything in square bracketrs (eg [Explicit])
# remove anything in square brackets (eg [Explicit])
for match in re.finditer(r"\[.*\]", title):
title = title.replace(match.group(), "")
@ -153,7 +166,6 @@ class Nicole:
urls.append("https://azlyrics.com/lyrics/" + artist + '/' + title + ".html")
return urls
def get_lyrics_azlyrics(self, urls):
"""
Extract the lyrics from the html
@ -164,7 +176,7 @@ class Nicole:
# visit the url
html = None
try:
html = str(ur.urlopen(url).read().decode("utf-8"))
html = str(urllib.request.urlopen(url).read().decode("utf-8"))
sleep(self.delay) # azlyrics blocks requests if there is no delay
except Exception:
sleep(self.delay) # azlyrics blocks requests if there is no delay
@ -191,10 +203,82 @@ class Nicole:
return (True, lyrics)
message += f"Could not lyrics in html for {url}\n "
message += f"Could not find lyrics in html for {url}\n "
message = message.strip(" \n")
return (False, message)
def get_url_genius(self, artist:str, title:str):
"""
Retrieve the url using the genius api:
1) Get song id using search for song + artist
2) Get url from song id
"""
# get search results
query_search = self.genius_search + urllib.parse.quote(f"{artist} {title}")
request_search = urllib.request.Request(query_search)
request_search.add_header("Authorization", f"Bearer {self.genius_access_token}")
try:
results = loads(urllib.request.urlopen(request_search).read())["response"]["hits"]
except urllib.error.URLError:
return (False, f"Could not access url: {query_search}")
message = ""
url = None
i = 0
while url is None and i < len(results):
# check if result is song and then get url
if results[i]["type"] == "song":
song_id = results[i]["result"]["id"]
# check if result is garbage by checking how similar title and artist names are
if self.sanity_checks:
genius_artist = results[i]["result"]["primary_artist"]["name"]
genius_artist_featured = results[i]["result"]["artist_names"]
genius_title = results[i]["result"]["title"]
genius_title_featured = results[i]["result"]["title_with_featured"]
if SequenceMatcher(None, title, genius_title).ratio() < self.sanity_min_title_ratio:
if SequenceMatcher(None, title, genius_title_featured).ratio() < self.sanity_min_title_ratio:
message += f"Genius result: titles do not match enough: '{title}' and '{genius_title}'/'{genius_title_featured}'\n "
i += 1
continue
if SequenceMatcher(None, artist, genius_artist).ratio() < self.sanity_min_artist_ratio:
if SequenceMatcher(None, artist, genius_artist_featured).ratio() < self.sanity_min_artist_ratio:
message += f"Genius result: artists do not match enough: '{artist}' and '{genius_artist}'/'{genius_artist_featured}'\n "
i += 1
continue
request_song = urllib.request.Request(f"{self.genius_song}{song_id}")
request_song.add_header("Authorization", f"Bearer {self.genius_access_token}")
try:
url = loads(urllib.request.urlopen(request_song).read())["response"]["song"]["url"]
except urllib.error.URLError:
message += f"Genius result: Could not access url: '{self.genius_song}{song_id}'\n "
i += 1
if not url:
message += f"Could not find song lyrics on genius"
return (False, message)
return (True, url)
def get_lyrics_genius(self, url):
request_lyrics = urllib.request.Request(url)
# request_lyrics.add_header("Authorization", f"Bearer {self.genius_access_token}")
request_lyrics.add_header("User-Agent", "Mozilla/5.0")
try:
html = urllib.request.urlopen(request_lyrics).read()
except urllib.error.URLError:
return (False, f"Could not access url: {url}")
# extract lyrics from html: lyrics are in divs with "data-lyrics-container=true"
lyrics = ""
soup = BeautifulSoup(html, "html.parser")
for br in soup.find_all("br"):
br.replaceWith("\n")
divs = soup.find_all("div", attrs={"data-lyrics-container": "true"})
if not divs:
return (False, f"Could not find lyrics in html: {url}")
for div in divs:
lyrics += div.get_text(separator="")
return (True, lyrics)
def process_dir(self, directory):
if not path.isabs(directory):
directory = path.normpath(getcwd() + "/" + directory)
@ -204,7 +288,6 @@ class Nicole:
if not self.silent:
print("\nProcessing directory: " + directory)
entries = listdir(directory)
entries.sort()
@ -230,7 +313,7 @@ class Nicole:
print(f"{entry}")
else:
print(f"{entry}")
print(" " + message)
print(" " + message)
elif path.isdir(entry) and self.recursive:
@ -301,39 +384,54 @@ class Nicole:
audio.save()
print(f"Removed '{word}' from the title.")
# currently the only supported site
if self.lyrics_site == "azlyrics":
urls = self.get_urls_azlyrics(artist, title)
success, lyrics = self.get_lyrics_azlyrics(urls)
lyrics = "Sample Lyrics"
success = False
site = "Sample Site"
message = ""
# try genius
if self.lyrics_site in ["all", "genius"]:
success, url = self.get_url_genius(artist, title)
if success:
if self.test_run:
print(f"{artist} - {title}:\n{lyrics}\n\n")
# write to tags
else:
if type(audio) == id3.ID3:
audio.add(id3.USLT(encoding=id3.Encoding.UTF8, lang=" ", text=lyrics))
audio.save(v2_version=4)
elif type(audio) == flac.FLAC:
audio["LYRICS"] = lyrics
audio.save()
else:
return (False, f"Could not write lyrics.")
# add to history
if self.write_history and file not in self.history:
self.history.append(file)
return (True, f"Written lyrics to {artist} - {title}")
success, lyrics = self.get_lyrics_genius(url)
if not success:
message += lyrics + "\n " # lyrics is error message
site = "genius"
else:
return (False, lyrics) # lyrics is error message here
message += url + "\n " # url is error message
# try azlyrics
if not success and self.lyrics_site in ["all", "azlyrics"]:
urls = self.get_urls_azlyrics(artist, title)
success, lyrics = self.get_lyrics_azlyrics(urls)
site = "azlyrics"
if not success:
message += lyrics
# if found lyrics
if success:
if self.test_run:
print(f"\n\n{artist} - {title}:\n{lyrics}\n")
# write to tags
else:
if type(audio) == id3.ID3:
audio.add(id3.USLT(encoding=id3.Encoding.UTF8, lang=" ", text=lyrics))
audio.save(v2_version=4)
elif type(audio) == flac.FLAC:
audio["LYRICS"] = lyrics
audio.save()
else:
return (False, f"Could find lyrics but failed to write the tag.")
return (False, "Failed for unknown reason.")
# add to history
if self.write_history and file not in self.history:
self.history.append(file)
message += f"Written lyrics from {site} to {artist} - {title}"
return (True, message)
else:
return (False, message.strip("\n "))
def main():
print("Nicole version 1.1")
# print("Get updates here: https://github.com/MatthiasQuintern/nicole")
print("Nicole version 2.0")
helpstring = """Command line options:
-d [directory] process directory [directory]
@ -345,24 +443,25 @@ def main():
-o overwrite if the file already has lyrics
-t test, do not write lyrics to file, but print to console
-h show this
--rm_explicit remove the "[Explicit]" lyrics warning from the songs title tag"""
--rm_explicit remove the "[Explicit]" lyrics warning from the songs title tag
--site [site] use only [site]: azlyrics or genius
Visit https://github.com/MatthiasQuintern/nicole for updates and further help."""
args = []
if len(argv) > 1:
# iterate over argv list and extract the args
i = 1
while i < len(argv):
arg = argv[i]
if "--" in arg:
args.append(arg.replace("--", ""))
elif "-" in arg:
if arg[0] == "-":
# check if option with arg, if yes add tuple to args
if len(argv) > i + 1 and argv[i+1][0] != "-":
args.append((arg.replace("-", ""), argv[i+1]))
i += 1
elif not "--" in arg:
for char in arg.replace("-", ""):
args.append(char)
else:
for c in arg.replace("-", ""):
args.append(c)
args.append(arg.replace("-", ""))
else:
print(f"Invalid or missing argument: '{arg}'")
print(helpstring)
@ -383,11 +482,16 @@ def main():
directory = None
file = None
site = "all"
for arg in args:
if type(arg) == tuple:
if arg[0] == "d": directory = arg[1]
elif arg[0] == "f": file = arg[1]
elif arg[0] == "site":
if arg[1] in ["genius", "azlyrics", "all"]: site = arg[1]
else:
print(f"Invalid site: '{arg[1]}'")
elif arg in options.keys():
# flip the bool associated with the char
@ -403,9 +507,9 @@ def main():
if options["h"]:
print(helpstring)
return 0
# create nicole instance
nicole = Nicole(test_run=options["t"], silent=options["s"], write_history=options["n"], ignore_history=options["i"], overwrite_tag=options["o"], recursive=options["r"], rm_explicit=options["rm_explicit"])
nicole = Nicole(test_run=options["t"], silent=options["s"], write_history=options["n"], ignore_history=options["i"], overwrite_tag=options["o"], recursive=options["r"], rm_explicit=options["rm_explicit"], lyrics_site=site)
# start with file or directory
if file:
@ -415,7 +519,7 @@ def main():
print(f"{file}")
else:
print(f"{file}")
print(" " + message)
print(" " + message)
elif directory:
try:
nicole.process_dir(directory)