added support for genius
This commit is contained in:
parent
6fc54858de
commit
9299140b87
176
nicole/nicole.py
176
nicole/nicole.py
@ -1,17 +1,22 @@
|
||||
#!/bin/python3
|
||||
# Copyright © 2022 Matthias Quintern.
|
||||
# This software comes with no warranty.
|
||||
# This software is licensed under the GPL3
|
||||
|
||||
from mutagen import easyid3, id3, flac
|
||||
import urllib.request as ur
|
||||
|
||||
import urllib
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from difflib import SequenceMatcher
|
||||
from json import loads
|
||||
|
||||
from os import path, getcwd, listdir, mkdir
|
||||
from time import sleep
|
||||
|
||||
from sys import argv
|
||||
|
||||
import re
|
||||
|
||||
"""
|
||||
Der Name Nicole ist frei erfunden und hat keine Bedeutung.
|
||||
Jeglicher Zusammenhang mit einer Website der DHL wird hiermit ausdrücklich ausgeschlossen.
|
||||
"""
|
||||
|
||||
# Der Name Nicole ist frei erfunden und hat keine Bedeutung.
|
||||
# Jeglicher Zusammenhang mit einer Website der DHL wird hiermit ausdrücklich ausgeschlossen.
|
||||
|
||||
class Nicole:
|
||||
"""
|
||||
@ -27,7 +32,7 @@ class Nicole:
|
||||
Nicole creates a azlyrics url from the title and artist mp3-tags of the file.
|
||||
The lyrics are extracted from the html document using regex.
|
||||
"""
|
||||
def __init__(self, test_run=False, silent=False, write_history=True, ignore_history=False, overwrite_tag=False, recursive=False, rm_explicit=False):
|
||||
def __init__(self, test_run=False, silent=False, write_history=True, ignore_history=False, overwrite_tag=False, recursive=False, rm_explicit=False, lyrics_site="all"):
|
||||
self.test_run = test_run
|
||||
self.silent = silent
|
||||
|
||||
@ -37,9 +42,17 @@ class Nicole:
|
||||
self.overwrite_tag = overwrite_tag
|
||||
self.recursive = recursive
|
||||
|
||||
self.lyrics_site = "azlyrics"
|
||||
self.lyrics_site = lyrics_site
|
||||
self.delay = 5 # enough delay so that azlyrics doesnt block the ip
|
||||
|
||||
self.genius_search = "https://api.genius.com/search?q="
|
||||
self.genius_song = "https://api.genius.com/songs/"
|
||||
self.genius_access_token = "MzQaNvA53GOGvRTV8OXUbq2NCMahcnVre5EZmj-OcSjVleVO4kNwMVZicPsD5AL7"
|
||||
|
||||
self.sanity_checks = True
|
||||
self.sanity_min_title_ratio = 0.6
|
||||
self.sanity_min_artist_ratio = 0.7
|
||||
|
||||
self.history = []
|
||||
self.failed = [] # All files that failed
|
||||
if not self.ignore_history:
|
||||
@ -82,7 +95,7 @@ class Nicole:
|
||||
def get_urls_azlyrics(self, artist:str, title:str):
|
||||
"""
|
||||
Create a azlyrics html from the artist and title
|
||||
If the title contains paranthesis or äüö, there will be multiple versions, one that contains the (...)öäü and one that doesn't.
|
||||
If the title contains parenthesis or äüö, there will be multiple versions, one that contains the (...)öäü and one that doesn't.
|
||||
"""
|
||||
# convert to lower case
|
||||
artist = artist.casefold()
|
||||
@ -94,7 +107,7 @@ class Nicole:
|
||||
elif artist[0:4] == "the ":
|
||||
artist = artist[4:]
|
||||
|
||||
# remove anything in square bracketrs (eg [Explicit])
|
||||
# remove anything in square brackets (eg [Explicit])
|
||||
for match in re.finditer(r"\[.*\]", title):
|
||||
title = title.replace(match.group(), "")
|
||||
|
||||
@ -153,7 +166,6 @@ class Nicole:
|
||||
urls.append("https://azlyrics.com/lyrics/" + artist + '/' + title + ".html")
|
||||
return urls
|
||||
|
||||
|
||||
def get_lyrics_azlyrics(self, urls):
|
||||
"""
|
||||
Extract the lyrics from the html
|
||||
@ -164,7 +176,7 @@ class Nicole:
|
||||
# visit the url
|
||||
html = None
|
||||
try:
|
||||
html = str(ur.urlopen(url).read().decode("utf-8"))
|
||||
html = str(urllib.request.urlopen(url).read().decode("utf-8"))
|
||||
sleep(self.delay) # azlyrics blocks requests if there is no delay
|
||||
except Exception:
|
||||
sleep(self.delay) # azlyrics blocks requests if there is no delay
|
||||
@ -191,10 +203,82 @@ class Nicole:
|
||||
|
||||
return (True, lyrics)
|
||||
|
||||
message += f"Could not lyrics in html for {url}\n "
|
||||
message += f"Could not find lyrics in html for {url}\n "
|
||||
message = message.strip(" \n")
|
||||
return (False, message)
|
||||
|
||||
def get_url_genius(self, artist:str, title:str):
|
||||
"""
|
||||
Retrieve the url using the genius api:
|
||||
1) Get song id using search for song + artist
|
||||
2) Get url from song id
|
||||
"""
|
||||
# get search results
|
||||
query_search = self.genius_search + urllib.parse.quote(f"{artist} {title}")
|
||||
request_search = urllib.request.Request(query_search)
|
||||
request_search.add_header("Authorization", f"Bearer {self.genius_access_token}")
|
||||
try:
|
||||
results = loads(urllib.request.urlopen(request_search).read())["response"]["hits"]
|
||||
except urllib.error.URLError:
|
||||
return (False, f"Could not access url: {query_search}")
|
||||
|
||||
message = ""
|
||||
url = None
|
||||
i = 0
|
||||
while url is None and i < len(results):
|
||||
# check if result is song and then get url
|
||||
if results[i]["type"] == "song":
|
||||
song_id = results[i]["result"]["id"]
|
||||
# check if result is garbage by checking how similar title and artist names are
|
||||
if self.sanity_checks:
|
||||
genius_artist = results[i]["result"]["primary_artist"]["name"]
|
||||
genius_artist_featured = results[i]["result"]["artist_names"]
|
||||
genius_title = results[i]["result"]["title"]
|
||||
genius_title_featured = results[i]["result"]["title_with_featured"]
|
||||
if SequenceMatcher(None, title, genius_title).ratio() < self.sanity_min_title_ratio:
|
||||
if SequenceMatcher(None, title, genius_title_featured).ratio() < self.sanity_min_title_ratio:
|
||||
message += f"Genius result: titles do not match enough: '{title}' and '{genius_title}'/'{genius_title_featured}'\n "
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if SequenceMatcher(None, artist, genius_artist).ratio() < self.sanity_min_artist_ratio:
|
||||
if SequenceMatcher(None, artist, genius_artist_featured).ratio() < self.sanity_min_artist_ratio:
|
||||
message += f"Genius result: artists do not match enough: '{artist}' and '{genius_artist}'/'{genius_artist_featured}'\n "
|
||||
i += 1
|
||||
continue
|
||||
request_song = urllib.request.Request(f"{self.genius_song}{song_id}")
|
||||
request_song.add_header("Authorization", f"Bearer {self.genius_access_token}")
|
||||
try:
|
||||
url = loads(urllib.request.urlopen(request_song).read())["response"]["song"]["url"]
|
||||
except urllib.error.URLError:
|
||||
message += f"Genius result: Could not access url: '{self.genius_song}{song_id}'\n "
|
||||
i += 1
|
||||
if not url:
|
||||
message += f"Could not find song lyrics on genius"
|
||||
return (False, message)
|
||||
return (True, url)
|
||||
|
||||
def get_lyrics_genius(self, url):
|
||||
request_lyrics = urllib.request.Request(url)
|
||||
# request_lyrics.add_header("Authorization", f"Bearer {self.genius_access_token}")
|
||||
request_lyrics.add_header("User-Agent", "Mozilla/5.0")
|
||||
try:
|
||||
html = urllib.request.urlopen(request_lyrics).read()
|
||||
except urllib.error.URLError:
|
||||
return (False, f"Could not access url: {url}")
|
||||
|
||||
# extract lyrics from html: lyrics are in divs with "data-lyrics-container=true"
|
||||
lyrics = ""
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
for br in soup.find_all("br"):
|
||||
br.replaceWith("\n")
|
||||
divs = soup.find_all("div", attrs={"data-lyrics-container": "true"})
|
||||
if not divs:
|
||||
return (False, f"Could not find lyrics in html: {url}")
|
||||
for div in divs:
|
||||
lyrics += div.get_text(separator="")
|
||||
return (True, lyrics)
|
||||
|
||||
def process_dir(self, directory):
|
||||
if not path.isabs(directory):
|
||||
directory = path.normpath(getcwd() + "/" + directory)
|
||||
@ -204,7 +288,6 @@ class Nicole:
|
||||
if not self.silent:
|
||||
print("\nProcessing directory: " + directory)
|
||||
|
||||
|
||||
entries = listdir(directory)
|
||||
entries.sort()
|
||||
|
||||
@ -301,14 +384,31 @@ class Nicole:
|
||||
audio.save()
|
||||
print(f"Removed '{word}' from the title.")
|
||||
|
||||
# currently the only supported site
|
||||
if self.lyrics_site == "azlyrics":
|
||||
lyrics = "Sample Lyrics"
|
||||
success = False
|
||||
site = "Sample Site"
|
||||
message = ""
|
||||
# try genius
|
||||
if self.lyrics_site in ["all", "genius"]:
|
||||
success, url = self.get_url_genius(artist, title)
|
||||
if success:
|
||||
success, lyrics = self.get_lyrics_genius(url)
|
||||
if not success:
|
||||
message += lyrics + "\n " # lyrics is error message
|
||||
site = "genius"
|
||||
else:
|
||||
message += url + "\n " # url is error message
|
||||
# try azlyrics
|
||||
if not success and self.lyrics_site in ["all", "azlyrics"]:
|
||||
urls = self.get_urls_azlyrics(artist, title)
|
||||
|
||||
success, lyrics = self.get_lyrics_azlyrics(urls)
|
||||
site = "azlyrics"
|
||||
if not success:
|
||||
message += lyrics
|
||||
# if found lyrics
|
||||
if success:
|
||||
if self.test_run:
|
||||
print(f"{artist} - {title}:\n{lyrics}\n\n")
|
||||
print(f"\n\n{artist} - {title}:\n{lyrics}\n")
|
||||
# write to tags
|
||||
else:
|
||||
if type(audio) == id3.ID3:
|
||||
@ -318,22 +418,20 @@ class Nicole:
|
||||
audio["LYRICS"] = lyrics
|
||||
audio.save()
|
||||
else:
|
||||
return (False, f"Could not write lyrics.")
|
||||
return (False, f"Could find lyrics but failed to write the tag.")
|
||||
|
||||
# add to history
|
||||
if self.write_history and file not in self.history:
|
||||
self.history.append(file)
|
||||
|
||||
return (True, f"Written lyrics to {artist} - {title}")
|
||||
message += f"Written lyrics from {site} to {artist} - {title}"
|
||||
return (True, message)
|
||||
else:
|
||||
return (False, lyrics) # lyrics is error message here
|
||||
|
||||
return (False, "Failed for unknown reason.")
|
||||
return (False, message.strip("\n "))
|
||||
|
||||
|
||||
def main():
|
||||
print("Nicole version 1.1")
|
||||
# print("Get updates here: https://github.com/MatthiasQuintern/nicole")
|
||||
print("Nicole version 2.0")
|
||||
|
||||
helpstring = """Command line options:
|
||||
-d [directory] process directory [directory]
|
||||
@ -345,24 +443,25 @@ def main():
|
||||
-o overwrite if the file already has lyrics
|
||||
-t test, do not write lyrics to file, but print to console
|
||||
-h show this
|
||||
--rm_explicit remove the "[Explicit]" lyrics warning from the songs title tag"""
|
||||
--rm_explicit remove the "[Explicit]" lyrics warning from the songs title tag
|
||||
--site [site] use only [site]: azlyrics or genius
|
||||
Visit https://github.com/MatthiasQuintern/nicole for updates and further help."""
|
||||
args = []
|
||||
if len(argv) > 1:
|
||||
# iterate over argv list and extract the args
|
||||
i = 1
|
||||
while i < len(argv):
|
||||
arg = argv[i]
|
||||
if "--" in arg:
|
||||
args.append(arg.replace("--", ""))
|
||||
|
||||
elif "-" in arg:
|
||||
if arg[0] == "-":
|
||||
# check if option with arg, if yes add tuple to args
|
||||
if len(argv) > i + 1 and argv[i+1][0] != "-":
|
||||
args.append((arg.replace("-", ""), argv[i+1]))
|
||||
i += 1
|
||||
elif not "--" in arg:
|
||||
for char in arg.replace("-", ""):
|
||||
args.append(char)
|
||||
else:
|
||||
for c in arg.replace("-", ""):
|
||||
args.append(c)
|
||||
args.append(arg.replace("-", ""))
|
||||
else:
|
||||
print(f"Invalid or missing argument: '{arg}'")
|
||||
print(helpstring)
|
||||
@ -383,11 +482,16 @@ def main():
|
||||
|
||||
directory = None
|
||||
file = None
|
||||
site = "all"
|
||||
|
||||
for arg in args:
|
||||
if type(arg) == tuple:
|
||||
if arg[0] == "d": directory = arg[1]
|
||||
elif arg[0] == "f": file = arg[1]
|
||||
elif arg[0] == "site":
|
||||
if arg[1] in ["genius", "azlyrics", "all"]: site = arg[1]
|
||||
else:
|
||||
print(f"Invalid site: '{arg[1]}'")
|
||||
|
||||
elif arg in options.keys():
|
||||
# flip the bool associated with the char
|
||||
@ -405,7 +509,7 @@ def main():
|
||||
return 0
|
||||
|
||||
# create nicole instance
|
||||
nicole = Nicole(test_run=options["t"], silent=options["s"], write_history=options["n"], ignore_history=options["i"], overwrite_tag=options["o"], recursive=options["r"], rm_explicit=options["rm_explicit"])
|
||||
nicole = Nicole(test_run=options["t"], silent=options["s"], write_history=options["n"], ignore_history=options["i"], overwrite_tag=options["o"], recursive=options["r"], rm_explicit=options["rm_explicit"], lyrics_site=site)
|
||||
|
||||
# start with file or directory
|
||||
if file:
|
||||
|
Loading…
Reference in New Issue
Block a user