Add files via upload
This commit is contained in:
542
VocalRemover.py
Normal file
542
VocalRemover.py
Normal file
@@ -0,0 +1,542 @@
|
|||||||
|
# GUI modules
|
||||||
|
import tkinter as tk
|
||||||
|
import tkinter.ttk as ttk
|
||||||
|
import tkinter.messagebox
|
||||||
|
import tkinter.filedialog
|
||||||
|
import tkinter.font
|
||||||
|
from datetime import datetime
|
||||||
|
# Images
|
||||||
|
from PIL import Image
|
||||||
|
from PIL import ImageTk
|
||||||
|
import pickle # Save Data
|
||||||
|
# Other Modules
|
||||||
|
import subprocess # Run python file
|
||||||
|
# Pathfinding
|
||||||
|
import pathlib
|
||||||
|
import os
|
||||||
|
from collections import defaultdict
|
||||||
|
# Used for live text displaying
|
||||||
|
import queue
|
||||||
|
import threading # Run the algorithm inside a thread
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import inference
|
||||||
|
|
||||||
|
# --Global Variables--
|
||||||
|
base_path = os.path.dirname(__file__)
|
||||||
|
os.chdir(base_path) # Change the current working directory to the base path
|
||||||
|
models_dir = os.path.join(base_path, 'models')
|
||||||
|
logo_path = os.path.join(base_path, 'Images/UVR-logo.png')
|
||||||
|
DEFAULT_DATA = {
|
||||||
|
'exportPath': '',
|
||||||
|
'gpuConversion': False,
|
||||||
|
'postprocessing': False,
|
||||||
|
'mask': False,
|
||||||
|
'stackLoops': False,
|
||||||
|
'srValue': 44100,
|
||||||
|
'hopValue': 1024,
|
||||||
|
'stackLoopsNum': 1,
|
||||||
|
'winSize': 512,
|
||||||
|
}
|
||||||
|
# Supported Music Files
|
||||||
|
AVAILABLE_FORMATS = ['.mp3', '.mp4', '.m4a', '.flac', '.wav']
|
||||||
|
|
||||||
|
|
||||||
|
def open_image(path: str, size: tuple = None, keep_aspect: bool = True, rotate: int = 0) -> tuple:
|
||||||
|
"""
|
||||||
|
Open the image on the path and apply given settings\n
|
||||||
|
Paramaters:
|
||||||
|
path(str):
|
||||||
|
Absolute path of the image
|
||||||
|
size(tuple):
|
||||||
|
first value - width
|
||||||
|
second value - height
|
||||||
|
keep_aspect(bool):
|
||||||
|
keep aspect ratio of image and resize
|
||||||
|
to maximum possible width and height
|
||||||
|
(maxima are given by size)
|
||||||
|
rotate(int):
|
||||||
|
clockwise rotation of image
|
||||||
|
Returns(tuple):
|
||||||
|
(ImageTk.PhotoImage, Image)
|
||||||
|
"""
|
||||||
|
img = Image.open(path)
|
||||||
|
ratio = img.height/img.width
|
||||||
|
img = img.rotate(angle=-rotate)
|
||||||
|
if size is not None:
|
||||||
|
size = (int(size[0]), int(size[1]))
|
||||||
|
if keep_aspect:
|
||||||
|
img = img.resize((size[0], int(size[0] * ratio)), Image.ANTIALIAS)
|
||||||
|
else:
|
||||||
|
img = img.resize(size, Image.ANTIALIAS)
|
||||||
|
img = img.convert(mode='RGBA')
|
||||||
|
return ImageTk.PhotoImage(img), img
|
||||||
|
|
||||||
|
|
||||||
|
def save_data(data):
|
||||||
|
"""
|
||||||
|
Saves given data as a .pkl (pickle) file
|
||||||
|
|
||||||
|
Paramters:
|
||||||
|
data(dict):
|
||||||
|
Dictionary containing all the necessary data to save
|
||||||
|
"""
|
||||||
|
# Open data file, create it if it does not exist
|
||||||
|
with open('data.pkl', 'wb') as data_file:
|
||||||
|
pickle.dump(data, data_file)
|
||||||
|
|
||||||
|
|
||||||
|
def load_data() -> dict:
|
||||||
|
"""
|
||||||
|
Loads saved pkl file and returns the stored data
|
||||||
|
|
||||||
|
Returns(dict):
|
||||||
|
Dictionary containing all the saved data
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open('data.pkl', 'rb') as data_file: # Open data file
|
||||||
|
data = pickle.load(data_file)
|
||||||
|
|
||||||
|
return data
|
||||||
|
except (ValueError, FileNotFoundError):
|
||||||
|
# Data File is corrupted or not found so recreate it
|
||||||
|
save_data(data=DEFAULT_DATA)
|
||||||
|
|
||||||
|
return load_data()
|
||||||
|
|
||||||
|
|
||||||
|
class ThreadSafeConsole(tk.Text):
|
||||||
|
"""
|
||||||
|
Text Widget which is thread safe for tkinter
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, master, **options):
|
||||||
|
tk.Text.__init__(self, master, **options)
|
||||||
|
self.queue = queue.Queue()
|
||||||
|
self.update_me()
|
||||||
|
|
||||||
|
def write(self, line):
|
||||||
|
self.queue.put(line)
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self.queue.put(None)
|
||||||
|
|
||||||
|
def update_me(self):
|
||||||
|
self.configure(state=tk.NORMAL)
|
||||||
|
try:
|
||||||
|
while 1:
|
||||||
|
line = self.queue.get_nowait()
|
||||||
|
if line is None:
|
||||||
|
self.delete(1.0, tk.END)
|
||||||
|
else:
|
||||||
|
self.insert(tk.END, str(line))
|
||||||
|
self.see(tk.END)
|
||||||
|
self.update_idletasks()
|
||||||
|
except queue.Empty:
|
||||||
|
pass
|
||||||
|
self.configure(state=tk.DISABLED)
|
||||||
|
self.after(100, self.update_me)
|
||||||
|
|
||||||
|
|
||||||
|
class MainWindow(tk.Tk):
|
||||||
|
# --Constants--
|
||||||
|
# None
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# Run the __init__ method on the tk.Tk class
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
# --Window Settings--
|
||||||
|
self.title('Desktop Application')
|
||||||
|
# Set Geometry and Center Window
|
||||||
|
self.geometry('{width}x{height}+{xpad}+{ypad}'.format(
|
||||||
|
width=530,
|
||||||
|
height=690,
|
||||||
|
xpad=int(self.winfo_screenwidth()/2 - 530/2),
|
||||||
|
ypad=int(self.winfo_screenheight()/2 - 690/2)))
|
||||||
|
self.configure(bg='#FFFFFF') # Set background color to white
|
||||||
|
self.resizable(False, False)
|
||||||
|
self.update()
|
||||||
|
|
||||||
|
# --Variables--
|
||||||
|
self.logo_img = open_image(path=logo_path,
|
||||||
|
size=(self.winfo_width(), 9999),
|
||||||
|
keep_aspect=True)[0]
|
||||||
|
self.label_to_path = defaultdict(lambda: '')
|
||||||
|
# -Tkinter Value Holders-
|
||||||
|
data = load_data()
|
||||||
|
self.exportPath_var = tk.StringVar(value=data['exportPath'])
|
||||||
|
self.filePaths = ''
|
||||||
|
self.gpuConversion_var = tk.BooleanVar(value=data['gpuConversion'])
|
||||||
|
self.postprocessing_var = tk.BooleanVar(value=data['postprocessing'])
|
||||||
|
self.mask_var = tk.BooleanVar(value=data['mask'])
|
||||||
|
self.stackLoops_var = tk.IntVar(value=data['stackLoops'])
|
||||||
|
self.srValue_var = tk.IntVar(value=data['srValue'])
|
||||||
|
self.hopValue_var = tk.IntVar(value=data['hopValue'])
|
||||||
|
self.winSize_var = tk.IntVar(value=data['winSize'])
|
||||||
|
self.stackLoopsNum_var = tk.IntVar(value=data['stackLoopsNum'])
|
||||||
|
self.model_var = tk.StringVar(value='')
|
||||||
|
|
||||||
|
self.progress_var = tk.IntVar(value=0)
|
||||||
|
|
||||||
|
# --Widgets--
|
||||||
|
self.create_widgets()
|
||||||
|
self.configure_widgets()
|
||||||
|
self.place_widgets()
|
||||||
|
|
||||||
|
self.update_available_models()
|
||||||
|
self.update_stack_state()
|
||||||
|
|
||||||
|
# -Widget Methods-
|
||||||
|
def create_widgets(self):
|
||||||
|
"""Create window widgets"""
|
||||||
|
self.title_Label = tk.Label(master=self, bg='white',
|
||||||
|
image=self.logo_img, compound=tk.TOP)
|
||||||
|
self.filePaths_Frame = tk.Frame(master=self, bg='white')
|
||||||
|
self.fill_filePaths_Frame()
|
||||||
|
|
||||||
|
self.options_Frame = tk.Frame(master=self, bg='white')
|
||||||
|
self.fill_options_Frame()
|
||||||
|
|
||||||
|
self.conversion_Button = ttk.Button(master=self,
|
||||||
|
text='Start Conversion',
|
||||||
|
command=self.start_conversion)
|
||||||
|
|
||||||
|
self.progressbar = ttk.Progressbar(master=self,
|
||||||
|
variable=self.progress_var)
|
||||||
|
|
||||||
|
self.command_Text = ThreadSafeConsole(master=self,
|
||||||
|
background='#EFEFEF',
|
||||||
|
borderwidth=0,)
|
||||||
|
self.command_Text.write(f'COMMAND LINE [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]') # nopep8
|
||||||
|
|
||||||
|
def configure_widgets(self):
|
||||||
|
"""Change widget styling and appearance"""
|
||||||
|
ttk.Style().configure('TCheckbutton', background='white')
|
||||||
|
|
||||||
|
def place_widgets(self):
|
||||||
|
"""Place main widgets"""
|
||||||
|
self.title_Label.place(x=-2, y=-2)
|
||||||
|
|
||||||
|
self.filePaths_Frame.place(x=10, y=0, width=-20, height=0,
|
||||||
|
relx=0, rely=0.19, relwidth=1, relheight=0.14)
|
||||||
|
self.options_Frame.place(x=25, y=15, width=-50, height=-30,
|
||||||
|
relx=0, rely=0.33, relwidth=1, relheight=0.23)
|
||||||
|
self.conversion_Button.place(x=10, y=5, width=-20, height=-10,
|
||||||
|
relx=0, rely=0.56, relwidth=1, relheight=0.07)
|
||||||
|
self.command_Text.place(x=15, y=10, width=-30, height=-10,
|
||||||
|
relx=0, rely=0.63, relwidth=1, relheight=0.28)
|
||||||
|
self.progressbar.place(x=25, y=15, width=-50, height=-30,
|
||||||
|
relx=0, rely=0.91, relwidth=1, relheight=0.09)
|
||||||
|
|
||||||
|
def fill_filePaths_Frame(self):
|
||||||
|
"""Fill Frame with neccessary widgets"""
|
||||||
|
# -Create Widgets-
|
||||||
|
# Save To Option
|
||||||
|
self.filePaths_saveTo_Button = ttk.Button(master=self.filePaths_Frame,
|
||||||
|
text='Save to',
|
||||||
|
command=self.open_export_filedialog)
|
||||||
|
self.filePaths_saveTo_Entry = ttk.Entry(master=self.filePaths_Frame,
|
||||||
|
textvariable=self.exportPath_var,
|
||||||
|
state=tk.DISABLED
|
||||||
|
)
|
||||||
|
# Select Music Files Option
|
||||||
|
self.filePaths_musicFile_Button = ttk.Button(master=self.filePaths_Frame,
|
||||||
|
text='Select Your Audio File(s)',
|
||||||
|
command=self.open_file_filedialog)
|
||||||
|
self.filePaths_musicFile_Entry = ttk.Entry(master=self.filePaths_Frame,
|
||||||
|
text=self.filePaths,
|
||||||
|
state=tk.DISABLED
|
||||||
|
)
|
||||||
|
# -Place Widgets-
|
||||||
|
# Save To Option
|
||||||
|
self.filePaths_saveTo_Button.place(x=0, y=5, width=0, height=-10,
|
||||||
|
relx=0, rely=0, relwidth=0.3, relheight=0.5)
|
||||||
|
self.filePaths_saveTo_Entry.place(x=10, y=7, width=-20, height=-14,
|
||||||
|
relx=0.3, rely=0, relwidth=0.7, relheight=0.5)
|
||||||
|
# Select Music Files Option
|
||||||
|
self.filePaths_musicFile_Button.place(x=0, y=5, width=0, height=-10,
|
||||||
|
relx=0, rely=0.5, relwidth=0.4, relheight=0.5)
|
||||||
|
self.filePaths_musicFile_Entry.place(x=10, y=7, width=-20, height=-14,
|
||||||
|
relx=0.4, rely=0.5, relwidth=0.6, relheight=0.5)
|
||||||
|
|
||||||
|
def fill_options_Frame(self):
|
||||||
|
"""Fill Frame with neccessary widgets"""
|
||||||
|
# -Create Widgets-
|
||||||
|
# GPU Selection
|
||||||
|
self.options_gpu_Checkbutton = ttk.Checkbutton(master=self.options_Frame,
|
||||||
|
text='GPU Conversion',
|
||||||
|
variable=self.gpuConversion_var,
|
||||||
|
)
|
||||||
|
# Postprocessing
|
||||||
|
self.options_post_Checkbutton = ttk.Checkbutton(master=self.options_Frame,
|
||||||
|
text='Post-Process (Dev Opt)',
|
||||||
|
variable=self.postprocessing_var,
|
||||||
|
)
|
||||||
|
# Mask
|
||||||
|
self.options_mask_Checkbutton = ttk.Checkbutton(master=self.options_Frame,
|
||||||
|
text='Save Mask PNG',
|
||||||
|
variable=self.mask_var,
|
||||||
|
)
|
||||||
|
# SR
|
||||||
|
self.options_sr_Entry = ttk.Entry(master=self.options_Frame,
|
||||||
|
textvariable=self.srValue_var,)
|
||||||
|
self.options_sr_Label = tk.Label(master=self.options_Frame,
|
||||||
|
text='SR', anchor=tk.W,
|
||||||
|
background='white')
|
||||||
|
# HOP LENGTH
|
||||||
|
self.options_hop_Entry = ttk.Entry(master=self.options_Frame,
|
||||||
|
textvariable=self.hopValue_var,)
|
||||||
|
self.options_hop_Label = tk.Label(master=self.options_Frame,
|
||||||
|
text='HOP LENGTH', anchor=tk.W,
|
||||||
|
background='white')
|
||||||
|
# WINDOW SIZE
|
||||||
|
self.options_winSize_Entry = ttk.Entry(master=self.options_Frame,
|
||||||
|
textvariable=self.winSize_var,)
|
||||||
|
self.options_winSize_Label = tk.Label(master=self.options_Frame,
|
||||||
|
text='WINDOW SIZE', anchor=tk.W,
|
||||||
|
background='white')
|
||||||
|
# Stack Loops
|
||||||
|
self.options_stack_Checkbutton = ttk.Checkbutton(master=self.options_Frame,
|
||||||
|
text='Stack Passes',
|
||||||
|
variable=self.stackLoops_var,
|
||||||
|
)
|
||||||
|
self.options_stack_Entry = ttk.Entry(master=self.options_Frame,
|
||||||
|
textvariable=self.stackLoopsNum_var,)
|
||||||
|
self.options_stack_Checkbutton.configure(command=self.update_stack_state) # nopep8
|
||||||
|
# Choose Model
|
||||||
|
self.options_model_Label = tk.Label(master=self.options_Frame,
|
||||||
|
text='Choose Your Model',
|
||||||
|
background='white')
|
||||||
|
self.options_model_Optionmenu = ttk.OptionMenu(self.options_Frame,
|
||||||
|
self.model_var,
|
||||||
|
1,
|
||||||
|
*[1, 2])
|
||||||
|
self.options_model_Button = ttk.Button(master=self.options_Frame,
|
||||||
|
text='Add Your Own Model',
|
||||||
|
command=self.open_newModel_filedialog)
|
||||||
|
# -Place Widgets-
|
||||||
|
# GPU Selection
|
||||||
|
self.options_gpu_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
|
relx=0, rely=0, relwidth=1/3, relheight=1/4)
|
||||||
|
self.options_post_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
|
relx=0, rely=1/4, relwidth=1/3, relheight=1/4)
|
||||||
|
self.options_mask_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
|
relx=0, rely=2/4, relwidth=1/3, relheight=1/4)
|
||||||
|
# Stack Loops
|
||||||
|
self.options_stack_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
|
relx=0, rely=3/4, relwidth=1/3/4*3, relheight=1/4)
|
||||||
|
self.options_stack_Entry.place(x=0, y=4, width=0, height=-8,
|
||||||
|
relx=1/3/4*2.4, rely=3/4, relwidth=1/3/4*0.9, relheight=1/4)
|
||||||
|
# SR
|
||||||
|
self.options_sr_Entry.place(x=-5, y=4, width=5, height=-8,
|
||||||
|
relx=1/3, rely=0, relwidth=1/3/4, relheight=1/4)
|
||||||
|
self.options_sr_Label.place(x=10, y=4, width=-10, height=-8,
|
||||||
|
relx=1/3/4 + 1/3, rely=0, relwidth=1/3/4*3, relheight=1/4)
|
||||||
|
# HOP LENGTH
|
||||||
|
self.options_hop_Entry.place(x=-5, y=4, width=5, height=-8,
|
||||||
|
relx=1/3, rely=1/4, relwidth=1/3/4, relheight=1/4)
|
||||||
|
self.options_hop_Label.place(x=10, y=4, width=-10, height=-8,
|
||||||
|
relx=1/3/4 + 1/3, rely=1/4, relwidth=1/3/4*3, relheight=1/4)
|
||||||
|
# WINDOW SIZE
|
||||||
|
self.options_winSize_Entry.place(x=-5, y=4, width=5, height=-8,
|
||||||
|
relx=1/3, rely=2/4, relwidth=1/3/4, relheight=1/4)
|
||||||
|
self.options_winSize_Label.place(x=10, y=4, width=-10, height=-8,
|
||||||
|
relx=1/3/4 + 1/3, rely=2/4, relwidth=1/3/4*3, relheight=1/4)
|
||||||
|
# Choose Model
|
||||||
|
self.options_model_Label.place(x=0, y=0, width=0, height=-10,
|
||||||
|
relx=2/3, rely=0, relwidth=1/3, relheight=1/3)
|
||||||
|
self.options_model_Optionmenu.place(x=15, y=-2.5, width=-30, height=-10,
|
||||||
|
relx=2/3, rely=1/3, relwidth=1/3, relheight=1/3)
|
||||||
|
self.options_model_Button.place(x=15, y=0, width=-30, height=-5,
|
||||||
|
relx=2/3, rely=2/3, relwidth=1/3, relheight=1/3)
|
||||||
|
|
||||||
|
# Opening filedialogs
|
||||||
|
def open_file_filedialog(self):
|
||||||
|
"""Make user select music files"""
|
||||||
|
paths = tk.filedialog.askopenfilenames(
|
||||||
|
parent=self,
|
||||||
|
title=f'Select Music Files',
|
||||||
|
initialdir='/',
|
||||||
|
initialfile='',
|
||||||
|
filetypes=[
|
||||||
|
('; '.join(AVAILABLE_FORMATS).replace('.', ''),
|
||||||
|
'*' + ' *'.join(AVAILABLE_FORMATS)),
|
||||||
|
])
|
||||||
|
if paths: # Path selected
|
||||||
|
for path in paths:
|
||||||
|
if not path.lower().endswith(tuple(AVAILABLE_FORMATS)):
|
||||||
|
tk.messagebox.showerror(master=self,
|
||||||
|
title='Invalid File',
|
||||||
|
message='Please select a \"{}\" audio file!'.format('" or "'.join(AVAILABLE_FORMATS)), # nopep8
|
||||||
|
detail=f'File: {path}')
|
||||||
|
return
|
||||||
|
self.filePaths = paths
|
||||||
|
# Change the entry text
|
||||||
|
self.filePaths_musicFile_Entry.configure(state=tk.NORMAL)
|
||||||
|
self.filePaths_musicFile_Entry.delete(0, tk.END)
|
||||||
|
self.filePaths_musicFile_Entry.insert(0, self.filePaths)
|
||||||
|
self.filePaths_musicFile_Entry.configure(state=tk.DISABLED)
|
||||||
|
|
||||||
|
def open_export_filedialog(self):
|
||||||
|
"""Make user select a folder to export the converted files in"""
|
||||||
|
path = tk.filedialog.askdirectory(
|
||||||
|
parent=self,
|
||||||
|
title=f'Select Folder',
|
||||||
|
initialdir='/',)
|
||||||
|
if path: # Path selected
|
||||||
|
self.exportPath_var.set(path)
|
||||||
|
|
||||||
|
def open_newModel_filedialog(self):
|
||||||
|
"""Make user select a ".pth" model to use for the vocal removing"""
|
||||||
|
path = tk.filedialog.askopenfilename(
|
||||||
|
parent=self,
|
||||||
|
title=f'Select Model File',
|
||||||
|
initialdir='/',
|
||||||
|
initialfile='',
|
||||||
|
filetypes=[
|
||||||
|
('pth', '*.pth'),
|
||||||
|
])
|
||||||
|
|
||||||
|
if path: # Path selected
|
||||||
|
if path.lower().endswith(('.pth')):
|
||||||
|
self.add_available_model(abs_path=path)
|
||||||
|
else:
|
||||||
|
tk.messagebox.showerror(master=self,
|
||||||
|
title='Invalid File',
|
||||||
|
message=f'Please select a PyTorch model file ".pth"!',
|
||||||
|
detail=f'File: {path}')
|
||||||
|
return
|
||||||
|
|
||||||
|
def start_conversion(self):
|
||||||
|
"""
|
||||||
|
Start the conversion for all the given mp3 and wav files
|
||||||
|
"""
|
||||||
|
# -Get all variables-
|
||||||
|
input_paths = self.filePaths
|
||||||
|
export_path = self.exportPath_var.get()
|
||||||
|
model_path = self.label_to_path[self.model_var.get()]
|
||||||
|
try:
|
||||||
|
sr = self.srValue_var.get()
|
||||||
|
hop_length = self.hopValue_var.get()
|
||||||
|
window_size = self.winSize_var.get()
|
||||||
|
loops_num = self.stackLoopsNum_var.get()
|
||||||
|
except tk.TclError: # Non integer was put in entry box
|
||||||
|
tk.messagebox.showwarning(master=self,
|
||||||
|
title='Invalid Input',
|
||||||
|
message='Please make sure you only input integer numbers!')
|
||||||
|
return
|
||||||
|
except SyntaxError: # Non integer was put in entry box
|
||||||
|
tk.messagebox.showwarning(master=self,
|
||||||
|
title='Invalid Music File',
|
||||||
|
message='You have selected an invalid music file!\nPlease make sure that your files still exist and end with either ".mp3", ".mp4", ".m4a", ".flac", ".wav"')
|
||||||
|
return
|
||||||
|
|
||||||
|
# -Check for invalid inputs-
|
||||||
|
if not any([(os.path.isfile(path) and path.endswith(('.mp3', '.mp4', '.m4a', '.flac', '.wav')))
|
||||||
|
for path in input_paths]):
|
||||||
|
tk.messagebox.showwarning(master=self,
|
||||||
|
title='Invalid Music File',
|
||||||
|
message='You have selected an invalid music file!\nPlease make sure that your files still exist and end with either ".mp3", ".mp4", ".m4a", ".flac", ".wav"')
|
||||||
|
return
|
||||||
|
if not os.path.isdir(export_path):
|
||||||
|
tk.messagebox.showwarning(master=self,
|
||||||
|
title='Invalid Export Directory',
|
||||||
|
message='You have selected an invalid export directory!\nPlease make sure that your directory still exists!')
|
||||||
|
return
|
||||||
|
if not os.path.isfile(model_path):
|
||||||
|
tk.messagebox.showwarning(master=self,
|
||||||
|
title='Invalid Model File',
|
||||||
|
message='You have selected an invalid model file!\nPlease make sure that your model file still exists!')
|
||||||
|
return
|
||||||
|
|
||||||
|
# -Save Data-
|
||||||
|
save_data(data={
|
||||||
|
'exportPath': export_path,
|
||||||
|
'gpuConversion': self.gpuConversion_var.get(),
|
||||||
|
'postprocessing': self.postprocessing_var.get(),
|
||||||
|
'mask': self.mask_var.get(),
|
||||||
|
'stackLoops': self.stackLoops_var.get(),
|
||||||
|
'gpuConversion': self.gpuConversion_var.get(),
|
||||||
|
'srValue': sr,
|
||||||
|
'hopValue': hop_length,
|
||||||
|
'winSize': window_size,
|
||||||
|
'stackLoopsNum': loops_num,
|
||||||
|
})
|
||||||
|
|
||||||
|
# -Run the algorithm-
|
||||||
|
threading.Thread(target=inference.main,
|
||||||
|
kwargs={
|
||||||
|
'input_paths': input_paths,
|
||||||
|
'gpu': 0 if self.gpuConversion_var.get() else -1,
|
||||||
|
'postprocess': self.postprocessing_var.get(),
|
||||||
|
'out_mask': self.mask_var.get(),
|
||||||
|
'model': model_path,
|
||||||
|
'sr': sr,
|
||||||
|
'hop_length': hop_length,
|
||||||
|
'window_size': window_size,
|
||||||
|
'export_path': export_path,
|
||||||
|
'loops': loops_num,
|
||||||
|
# Other Variables (Tkinter)
|
||||||
|
'window': self,
|
||||||
|
'command_widget': self.command_Text,
|
||||||
|
'button_widget': self.conversion_Button,
|
||||||
|
'progress_var': self.progress_var,
|
||||||
|
},
|
||||||
|
daemon=True
|
||||||
|
).start()
|
||||||
|
|
||||||
|
# Models
|
||||||
|
def update_available_models(self):
|
||||||
|
"""
|
||||||
|
Loop through every model (.pth) in the models directory
|
||||||
|
and add to the select your model list
|
||||||
|
"""
|
||||||
|
# Delete all previous options
|
||||||
|
self.model_var.set('')
|
||||||
|
self.options_model_Optionmenu['menu'].delete(0, 'end')
|
||||||
|
|
||||||
|
for file_name in os.listdir(models_dir):
|
||||||
|
if file_name.endswith('.pth'):
|
||||||
|
# Add Radiobutton to the Options Menu
|
||||||
|
self.options_model_Optionmenu['menu'].add_radiobutton(label=file_name,
|
||||||
|
command=tk._setit(self.model_var, file_name))
|
||||||
|
# Link the files name to its absolute path
|
||||||
|
self.label_to_path[file_name] = os.path.join(models_dir, file_name) # nopep8
|
||||||
|
|
||||||
|
def add_available_model(self, abs_path: str):
|
||||||
|
"""
|
||||||
|
Add the given absolute path of the file (.pth) to the available options
|
||||||
|
and set the currently selected model to this one
|
||||||
|
"""
|
||||||
|
if abs_path.endswith('.pth'):
|
||||||
|
file_name = f'[CUSTOM] {os.path.basename(abs_path)}'
|
||||||
|
# Add Radiobutton to the Options Menu
|
||||||
|
self.options_model_Optionmenu['menu'].add_radiobutton(label=file_name,
|
||||||
|
command=tk._setit(self.model_var, file_name))
|
||||||
|
# Set selected model to the newly added one
|
||||||
|
self.model_var.set(file_name)
|
||||||
|
# Link the files name to its absolute path
|
||||||
|
self.label_to_path[file_name] = abs_path # nopep8
|
||||||
|
else:
|
||||||
|
tk.messagebox.showerror(master=self,
|
||||||
|
title='Invalid File',
|
||||||
|
message='Please select a model file with the ".pth" ending!',
|
||||||
|
detail=f'File: {abs_path}')
|
||||||
|
|
||||||
|
def update_stack_state(self):
|
||||||
|
"""
|
||||||
|
Vary the stack Entry fro disabled/enabled based on the
|
||||||
|
stackLoops variable, which is connected to the checkbutton
|
||||||
|
"""
|
||||||
|
if self.stackLoops_var.get():
|
||||||
|
self.options_stack_Entry.configure(state=tk.NORMAL)
|
||||||
|
else:
|
||||||
|
self.options_stack_Entry.configure(state=tk.DISABLED)
|
||||||
|
self.stackLoopsNum_var.set(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
root = MainWindow()
|
||||||
|
|
||||||
|
root.mainloop()
|
||||||
76
augment.py
Normal file
76
augment.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
import librosa
|
||||||
|
import numpy as np
|
||||||
|
import soundfile as sf
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from lib import spec_utils
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument('--sr', '-r', type=int, default=44100)
|
||||||
|
p.add_argument('--hop_length', '-l', type=int, default=1024)
|
||||||
|
p.add_argument('--pitch', '-p', type=int, default=-2)
|
||||||
|
p.add_argument('--mixture_dataset', '-m', required=True)
|
||||||
|
p.add_argument('--instrumental_dataset', '-i', required=True)
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
input_exts = ['.wav', '.m4a', '.3gp', '.oma', '.mp3', '.mp4']
|
||||||
|
X_list = sorted([
|
||||||
|
os.path.join(args.mixture_dataset, fname)
|
||||||
|
for fname in os.listdir(args.mixture_dataset)
|
||||||
|
if os.path.splitext(fname)[1] in input_exts])
|
||||||
|
y_list = sorted([
|
||||||
|
os.path.join(args.instrumental_dataset, fname)
|
||||||
|
for fname in os.listdir(args.instrumental_dataset)
|
||||||
|
if os.path.splitext(fname)[1] in input_exts])
|
||||||
|
|
||||||
|
input_i = 'input_i_{}.wav'.format(args.pitch)
|
||||||
|
input_v = 'input_v_{}.wav'.format(args.pitch)
|
||||||
|
output_i = 'output_i_{}.wav'.format(args.pitch)
|
||||||
|
output_v = 'output_v_{}.wav'.format(args.pitch)
|
||||||
|
cmd_i = 'soundstretch {} {} -pitch={}'.format(input_i, output_i, args.pitch)
|
||||||
|
cmd_v = 'soundstretch {} {} -pitch={}'.format(input_v, output_v, args.pitch)
|
||||||
|
suffix = '_pitch{}.npy'.format(args.pitch)
|
||||||
|
|
||||||
|
filelist = list(zip(X_list, y_list))
|
||||||
|
for mix_path, inst_path in tqdm(filelist):
|
||||||
|
X, _ = librosa.load(
|
||||||
|
mix_path, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
|
||||||
|
y, _ = librosa.load(
|
||||||
|
inst_path, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
|
||||||
|
|
||||||
|
X, _ = librosa.effects.trim(X)
|
||||||
|
y, _ = librosa.effects.trim(y)
|
||||||
|
X, y = spec_utils.align_wave_head_and_tail(X, y, args.sr)
|
||||||
|
|
||||||
|
v = X - y
|
||||||
|
sf.write(input_i, y.T, args.sr)
|
||||||
|
sf.write(input_v, v.T, args.sr)
|
||||||
|
subprocess.call(cmd_i, stderr=subprocess.DEVNULL)
|
||||||
|
subprocess.call(cmd_v, stderr=subprocess.DEVNULL)
|
||||||
|
|
||||||
|
y, _ = librosa.load(
|
||||||
|
output_i, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
|
||||||
|
v, _ = librosa.load(
|
||||||
|
output_v, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
|
||||||
|
X = y + v
|
||||||
|
|
||||||
|
spec = spec_utils.calc_spec(X, args.hop_length)
|
||||||
|
basename, _ = os.path.splitext(os.path.basename(mix_path))
|
||||||
|
outpath = os.path.join(args.mixture_dataset, basename + suffix)
|
||||||
|
np.save(outpath, np.abs(spec))
|
||||||
|
|
||||||
|
spec = spec_utils.calc_spec(y, args.hop_length)
|
||||||
|
basename, _ = os.path.splitext(os.path.basename(inst_path))
|
||||||
|
outpath = os.path.join(args.instrumental_dataset, basename + suffix)
|
||||||
|
np.save(outpath, np.abs(spec))
|
||||||
|
|
||||||
|
os.remove(input_i)
|
||||||
|
os.remove(input_v)
|
||||||
|
os.remove(output_i)
|
||||||
|
os.remove(output_v)
|
||||||
200
inference.py
Normal file
200
inference.py
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import librosa
|
||||||
|
import numpy as np
|
||||||
|
import soundfile as sf
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from lib import dataset
|
||||||
|
from lib import nets
|
||||||
|
from lib import spec_utils
|
||||||
|
|
||||||
|
# Variable manipulation and command line text parsing
|
||||||
|
import torch
|
||||||
|
import tkinter as tk
|
||||||
|
import traceback # Error Message Recent Calls
|
||||||
|
|
||||||
|
|
||||||
|
class Namespace:
|
||||||
|
"""
|
||||||
|
Replaces ArgumentParser
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
self.__dict__.update(kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def main(window: tk.Wm, input_paths: list, gpu: bool = -1,
|
||||||
|
model: str = 'models/baseline.pth', sr: int = 44100, hop_length: int = 1024,
|
||||||
|
window_size: int = 512, out_mask: bool = False, postprocess: bool = False,
|
||||||
|
export_path: str = '', loops: int = 1,
|
||||||
|
# Other Variables (Tkinter)
|
||||||
|
progress_var: tk.Variable = None, button_widget: tk.Button = None, command_widget: tk.Text = None,
|
||||||
|
):
|
||||||
|
def load_model():
|
||||||
|
args.command_widget.write('Loading model...\n') # nopep8 Write Command Text
|
||||||
|
device = torch.device('cpu')
|
||||||
|
model = nets.CascadedASPPNet()
|
||||||
|
model.load_state_dict(torch.load(args.model, map_location=device))
|
||||||
|
if torch.cuda.is_available() and args.gpu >= 0:
|
||||||
|
device = torch.device('cuda:{}'.format(args.gpu))
|
||||||
|
model.to(device)
|
||||||
|
args.command_widget.write('Done!\n') # nopep8 Write Command Text
|
||||||
|
|
||||||
|
return model, device
|
||||||
|
|
||||||
|
def load_wave_source():
|
||||||
|
args.command_widget.write(base_text + 'Loading wave source...\n') # nopep8 Write Command Text
|
||||||
|
X, sr = librosa.load(music_file,
|
||||||
|
args.sr,
|
||||||
|
False,
|
||||||
|
dtype=np.float32,
|
||||||
|
res_type='kaiser_fast')
|
||||||
|
args.command_widget.write(base_text + 'Done!\n') # nopep8 Write Command Text
|
||||||
|
|
||||||
|
return X, sr
|
||||||
|
|
||||||
|
def stft_wave_source(X):
|
||||||
|
args.command_widget.write(base_text + 'Stft of wave source...\n') # nopep8 Write Command Text
|
||||||
|
X = spec_utils.calc_spec(X, args.hop_length)
|
||||||
|
X, phase = np.abs(X), np.exp(1.j * np.angle(X))
|
||||||
|
coeff = X.max()
|
||||||
|
X /= coeff
|
||||||
|
|
||||||
|
offset = model.offset
|
||||||
|
l, r, roi_size = dataset.make_padding(
|
||||||
|
X.shape[2], args.window_size, offset)
|
||||||
|
X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode='constant')
|
||||||
|
X_roll = np.roll(X_pad, roi_size // 2, axis=2)
|
||||||
|
|
||||||
|
model.eval()
|
||||||
|
with torch.no_grad():
|
||||||
|
masks = []
|
||||||
|
masks_roll = []
|
||||||
|
length = int(np.ceil(X.shape[2] / roi_size))
|
||||||
|
for i in tqdm(range(length)):
|
||||||
|
progress_var.set(base_progress + max_progress * (0.1 + (0.6/length * i))) # nopep8 Update Progress
|
||||||
|
start = i * roi_size
|
||||||
|
X_window = torch.from_numpy(np.asarray([
|
||||||
|
X_pad[:, :, start:start + args.window_size],
|
||||||
|
X_roll[:, :, start:start + args.window_size]
|
||||||
|
])).to(device)
|
||||||
|
pred = model.predict(X_window)
|
||||||
|
pred = pred.detach().cpu().numpy()
|
||||||
|
masks.append(pred[0])
|
||||||
|
masks_roll.append(pred[1])
|
||||||
|
|
||||||
|
mask = np.concatenate(masks, axis=2)[:, :, :X.shape[2]]
|
||||||
|
mask_roll = np.concatenate(masks_roll, axis=2)[
|
||||||
|
:, :, :X.shape[2]]
|
||||||
|
mask = (mask + np.roll(mask_roll, -roi_size // 2, axis=2)) / 2
|
||||||
|
|
||||||
|
if args.postprocess:
|
||||||
|
vocal = X * (1 - mask) * coeff
|
||||||
|
mask = spec_utils.mask_uninformative(mask, vocal)
|
||||||
|
args.command_widget.write(base_text + 'Done!\n') # nopep8 Write Command Text
|
||||||
|
|
||||||
|
inst = X * mask * coeff
|
||||||
|
vocal = X * (1 - mask) * coeff
|
||||||
|
|
||||||
|
return inst, vocal, phase, mask
|
||||||
|
|
||||||
|
def invert_instrum_vocal(inst, vocal, phase):
|
||||||
|
args.command_widget.write(base_text + 'Inverse stft of instruments and vocals...\n') # nopep8 Write Command Text
|
||||||
|
|
||||||
|
wav_instrument = spec_utils.spec_to_wav(inst, phase, args.hop_length) # nopep8
|
||||||
|
wav_vocals = spec_utils.spec_to_wav(vocal, phase, args.hop_length) # nopep8
|
||||||
|
|
||||||
|
args.command_widget.write(base_text + 'Done!\n') # nopep8 Write Command Text
|
||||||
|
|
||||||
|
return wav_instrument, wav_vocals
|
||||||
|
|
||||||
|
def save_files(wav_instrument, wav_vocals):
|
||||||
|
args.command_widget.write(base_text + 'Saving Files...\n') # nopep8 Write Command Text
|
||||||
|
sf.write(f'{export_path}/{base_name}_(Instrumental).wav',
|
||||||
|
wav_instrument.T, sr)
|
||||||
|
if cur_loop == 0:
|
||||||
|
sf.write(f'{export_path}/{base_name}_(Vocals).wav',
|
||||||
|
wav_vocals.T, sr)
|
||||||
|
if (cur_loop == (args.loops - 1) and
|
||||||
|
args.loops > 1):
|
||||||
|
sf.write(f'{export_path}/{base_name}_(Last_Vocals).wav',
|
||||||
|
wav_vocals.T, sr)
|
||||||
|
|
||||||
|
args.command_widget.write(base_text + 'Done!\n') # nopep8 Write Command Text
|
||||||
|
|
||||||
|
def create_mask():
|
||||||
|
args.command_widget.write(base_text + 'Creating Mask...\n') # nopep8 Write Command Text
|
||||||
|
norm_mask = np.uint8((1 - mask) * 255).transpose(1, 2, 0)
|
||||||
|
norm_mask = np.concatenate([
|
||||||
|
np.max(norm_mask, axis=2, keepdims=True),
|
||||||
|
norm_mask], axis=2)[::-1]
|
||||||
|
_, bin_mask = cv2.imencode('.png', norm_mask)
|
||||||
|
args.command_widget.write(base_text + 'Saving Mask...\n') # nopep8 Write Command Text
|
||||||
|
with open(f'{export_path}/{base_name}_(Mask).png', mode='wb') as f:
|
||||||
|
bin_mask.tofile(f)
|
||||||
|
args.command_widget.write(base_text + 'Done!\n') # nopep8 Write Command Text
|
||||||
|
|
||||||
|
args = Namespace(input=input_paths, gpu=gpu, model=model,
|
||||||
|
sr=sr, hop_length=hop_length, window_size=window_size,
|
||||||
|
out_mask=out_mask, postprocess=postprocess, export=export_path,
|
||||||
|
loops=loops,
|
||||||
|
# Other Variables (Tkinter)
|
||||||
|
window=window, progress_var=progress_var,
|
||||||
|
button_widget=button_widget, command_widget=command_widget,
|
||||||
|
)
|
||||||
|
args.command_widget.clear() # Clear Command Text
|
||||||
|
args.button_widget.configure(state=tk.DISABLED) # Disable Button
|
||||||
|
total_files = len(args.input) # Used to calculate progress
|
||||||
|
|
||||||
|
model, device = load_model()
|
||||||
|
|
||||||
|
for file_num, music_file in enumerate(args.input, start=1):
|
||||||
|
try:
|
||||||
|
base_name = f'{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}'
|
||||||
|
for cur_loop in range(args.loops):
|
||||||
|
if cur_loop > 0:
|
||||||
|
args.command_widget.write(f'File {file_num}/{total_files}: ' + 'Next Pass!\n') # nopep8 Write Command Text
|
||||||
|
music_file = f'{export_path}/{base_name}_(Instrumental).wav'
|
||||||
|
base_progress = 100 / \
|
||||||
|
(total_files*args.loops) * \
|
||||||
|
((file_num*args.loops)-((args.loops-1) - cur_loop)-1)
|
||||||
|
base_text = 'File {file_num}/{total_files}:{loop} '.format(
|
||||||
|
file_num=file_num,
|
||||||
|
total_files=total_files,
|
||||||
|
loop='' if args.loops <= 1 else f' ({cur_loop+1}/{args.loops})')
|
||||||
|
max_progress = 100 / (total_files*args.loops)
|
||||||
|
progress_var.set(base_progress + max_progress * 0.05) # nopep8 Update Progress
|
||||||
|
|
||||||
|
X, sr = load_wave_source()
|
||||||
|
progress_var.set(base_progress + max_progress * 0.1) # nopep8 Update Progress
|
||||||
|
|
||||||
|
inst, vocal, phase, mask = stft_wave_source(X)
|
||||||
|
progress_var.set(base_progress + max_progress * 0.7) # nopep8 Update Progress
|
||||||
|
|
||||||
|
wav_instrument, wav_vocals = invert_instrum_vocal(inst, vocal, phase) # nopep8
|
||||||
|
progress_var.set(base_progress + max_progress * 0.8) # nopep8 Update Progress
|
||||||
|
|
||||||
|
save_files(wav_instrument, wav_vocals)
|
||||||
|
progress_var.set(base_progress + max_progress * 0.9) # nopep8 Update Progress
|
||||||
|
|
||||||
|
if args.out_mask:
|
||||||
|
create_mask()
|
||||||
|
progress_var.set(base_progress + max_progress * 1) # nopep8 Update Progress
|
||||||
|
|
||||||
|
args.command_widget.write(base_text + 'Completed Seperation!\n\n') # nopep8 Write Command Text
|
||||||
|
except Exception as e:
|
||||||
|
traceback_text = ''.join(traceback.format_tb(e.__traceback__))
|
||||||
|
print(traceback_text)
|
||||||
|
print(type(e).__name__, e)
|
||||||
|
tk.messagebox.showerror(master=args.window,
|
||||||
|
title='Untracked Error',
|
||||||
|
message=f'Traceback Error: "{traceback_text}"\n{type(e).__name__}: "{e}"\nFile: {music_file}\n\nPlease contact the creator and attach a screenshot of this error with the file which caused it!')
|
||||||
|
args.button_widget.configure(state=tk.NORMAL) # Enable Button
|
||||||
|
return
|
||||||
|
|
||||||
|
progress_var.set(100) # Update Progress
|
||||||
|
args.command_widget.write(f'Conversion(s) Completed and Saving all Files!') # nopep8 Write Command Text
|
||||||
|
args.button_widget.configure(state=tk.NORMAL) # Enable Button
|
||||||
223
train.py
Normal file
223
train.py
Normal file
@@ -0,0 +1,223 @@
|
|||||||
|
import argparse
|
||||||
|
from datetime import datetime as dt
|
||||||
|
import gc
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
from lib import dataset
|
||||||
|
from lib import nets
|
||||||
|
from lib import spec_utils
|
||||||
|
|
||||||
|
|
||||||
|
def train_val_split(mix_dir, inst_dir, val_rate, val_filelist_json):
|
||||||
|
input_exts = ['.wav', '.m4a', '.3gp', '.oma', '.mp3', '.mp4']
|
||||||
|
X_list = sorted([
|
||||||
|
os.path.join(mix_dir, fname)
|
||||||
|
for fname in os.listdir(mix_dir)
|
||||||
|
if os.path.splitext(fname)[1] in input_exts])
|
||||||
|
y_list = sorted([
|
||||||
|
os.path.join(inst_dir, fname)
|
||||||
|
for fname in os.listdir(inst_dir)
|
||||||
|
if os.path.splitext(fname)[1] in input_exts])
|
||||||
|
|
||||||
|
filelist = list(zip(X_list, y_list))
|
||||||
|
random.shuffle(filelist)
|
||||||
|
|
||||||
|
val_filelist = []
|
||||||
|
if val_filelist_json is not None:
|
||||||
|
with open(val_filelist_json, 'r', encoding='utf8') as f:
|
||||||
|
val_filelist = json.load(f)
|
||||||
|
|
||||||
|
if len(val_filelist) == 0:
|
||||||
|
val_size = int(len(filelist) * val_rate)
|
||||||
|
train_filelist = filelist[:-val_size]
|
||||||
|
val_filelist = filelist[-val_size:]
|
||||||
|
else:
|
||||||
|
train_filelist = [
|
||||||
|
pair for pair in filelist
|
||||||
|
if list(pair) not in val_filelist]
|
||||||
|
|
||||||
|
return train_filelist, val_filelist
|
||||||
|
|
||||||
|
|
||||||
|
def train_inner_epoch(X_train, y_train, model, optimizer, batchsize, instance_loss):
|
||||||
|
sum_loss = 0
|
||||||
|
model.train()
|
||||||
|
aux_crit = nn.L1Loss()
|
||||||
|
criterion = nn.L1Loss(reduction='none')
|
||||||
|
perm = np.random.permutation(len(X_train))
|
||||||
|
for i in range(0, len(X_train), batchsize):
|
||||||
|
local_perm = perm[i: i + batchsize]
|
||||||
|
X_batch = torch.from_numpy(X_train[local_perm]).cpu()
|
||||||
|
y_batch = torch.from_numpy(y_train[local_perm]).cpu()
|
||||||
|
|
||||||
|
model.zero_grad()
|
||||||
|
mask, aux = model(X_batch)
|
||||||
|
|
||||||
|
aux_loss = aux_crit(X_batch * aux, y_batch)
|
||||||
|
X_batch = spec_utils.crop_center(mask, X_batch, False)
|
||||||
|
y_batch = spec_utils.crop_center(mask, y_batch, False)
|
||||||
|
abs_diff = criterion(X_batch * mask, y_batch)
|
||||||
|
|
||||||
|
loss = abs_diff.mean() * 0.9 + aux_loss * 0.1
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
abs_diff_np = abs_diff.detach().cpu().numpy()
|
||||||
|
instance_loss[local_perm] += abs_diff_np.mean(axis=(1, 2, 3))
|
||||||
|
sum_loss += float(loss.detach().cpu().numpy()) * len(X_batch)
|
||||||
|
|
||||||
|
return sum_loss / len(X_train)
|
||||||
|
|
||||||
|
|
||||||
|
def val_inner_epoch(dataloader, model):
|
||||||
|
sum_loss = 0
|
||||||
|
model.eval()
|
||||||
|
criterion = nn.L1Loss()
|
||||||
|
with torch.no_grad():
|
||||||
|
for X_batch, y_batch in dataloader:
|
||||||
|
X_batch = X_batch.cpu()
|
||||||
|
y_batch = y_batch.cpu()
|
||||||
|
mask = model.predict(X_batch)
|
||||||
|
X_batch = spec_utils.crop_center(mask, X_batch, False)
|
||||||
|
y_batch = spec_utils.crop_center(mask, y_batch, False)
|
||||||
|
|
||||||
|
loss = criterion(X_batch * mask, y_batch)
|
||||||
|
sum_loss += float(loss.detach().cpu().numpy()) * len(X_batch)
|
||||||
|
|
||||||
|
return sum_loss / len(dataloader.dataset)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument('--gpu', '-g', type=int, default=-1)
|
||||||
|
p.add_argument('--seed', '-s', type=int, default=2019)
|
||||||
|
p.add_argument('--sr', '-r', type=int, default=44100)
|
||||||
|
p.add_argument('--hop_length', '-l', type=int, default=1024)
|
||||||
|
p.add_argument('--mixture_dataset', '-m', required=True)
|
||||||
|
p.add_argument('--instrumental_dataset', '-i', required=True)
|
||||||
|
p.add_argument('--learning_rate', type=float, default=0.001)
|
||||||
|
p.add_argument('--lr_min', type=float, default=0.0001)
|
||||||
|
p.add_argument('--lr_decay_factor', type=float, default=0.9)
|
||||||
|
p.add_argument('--lr_decay_patience', type=int, default=6)
|
||||||
|
p.add_argument('--batchsize', '-B', type=int, default=4)
|
||||||
|
p.add_argument('--cropsize', '-c', type=int, default=256)
|
||||||
|
p.add_argument('--val_rate', '-v', type=float, default=0.1)
|
||||||
|
p.add_argument('--val_filelist', '-V', type=str, default=None)
|
||||||
|
p.add_argument('--val_batchsize', '-b', type=int, default=4)
|
||||||
|
p.add_argument('--val_cropsize', '-C', type=int, default=512)
|
||||||
|
p.add_argument('--patches', '-p', type=int, default=16)
|
||||||
|
p.add_argument('--epoch', '-E', type=int, default=100)
|
||||||
|
p.add_argument('--inner_epoch', '-e', type=int, default=4)
|
||||||
|
p.add_argument('--oracle_rate', '-O', type=float, default=0)
|
||||||
|
p.add_argument('--oracle_drop_rate', '-o', type=float, default=0.5)
|
||||||
|
p.add_argument('--mixup_rate', '-M', type=float, default=0.0)
|
||||||
|
p.add_argument('--mixup_alpha', '-a', type=float, default=1.0)
|
||||||
|
p.add_argument('--pretrained_model', '-P', type=str, default=None)
|
||||||
|
p.add_argument('--debug', '-d', action='store_true')
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
random.seed(args.seed)
|
||||||
|
np.random.seed(args.seed)
|
||||||
|
torch.manual_seed(args.seed)
|
||||||
|
timestamp = dt.now().strftime('%Y%m%d%H%M%S')
|
||||||
|
|
||||||
|
model = nets.CascadedASPPNet()
|
||||||
|
if args.pretrained_model is not None:
|
||||||
|
model.load_state_dict(torch.load(args.pretrained_model))
|
||||||
|
if args.gpu >= 0:
|
||||||
|
model.cuda()
|
||||||
|
|
||||||
|
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
|
||||||
|
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
||||||
|
optimizer,
|
||||||
|
factor=args.lr_decay_factor,
|
||||||
|
patience=args.lr_decay_patience,
|
||||||
|
min_lr=args.lr_min,
|
||||||
|
verbose=True)
|
||||||
|
|
||||||
|
train_filelist, val_filelist = train_val_split(
|
||||||
|
mix_dir=args.mixture_dataset,
|
||||||
|
inst_dir=args.instrumental_dataset,
|
||||||
|
val_rate=args.val_rate,
|
||||||
|
val_filelist_json=args.val_filelist)
|
||||||
|
|
||||||
|
if args.debug:
|
||||||
|
print('### DEBUG MODE')
|
||||||
|
train_filelist = train_filelist[:1]
|
||||||
|
val_filelist = val_filelist[:1]
|
||||||
|
|
||||||
|
with open('val_{}.json'.format(timestamp), 'w', encoding='utf8') as f:
|
||||||
|
json.dump(val_filelist, f, ensure_ascii=False)
|
||||||
|
|
||||||
|
for i, (X_fname, y_fname) in enumerate(val_filelist):
|
||||||
|
print(i + 1, os.path.basename(X_fname), os.path.basename(y_fname))
|
||||||
|
|
||||||
|
val_dataset = dataset.make_validation_set(
|
||||||
|
filelist=val_filelist,
|
||||||
|
cropsize=args.val_cropsize,
|
||||||
|
sr=args.sr,
|
||||||
|
hop_length=args.hop_length,
|
||||||
|
offset=model.offset)
|
||||||
|
val_dataloader = torch.utils.data.DataLoader(
|
||||||
|
dataset=val_dataset,
|
||||||
|
batch_size=args.val_batchsize,
|
||||||
|
shuffle=False,
|
||||||
|
num_workers=4)
|
||||||
|
|
||||||
|
log = []
|
||||||
|
oracle_X = None
|
||||||
|
oracle_y = None
|
||||||
|
best_loss = np.inf
|
||||||
|
for epoch in range(args.epoch):
|
||||||
|
X_train, y_train = dataset.make_training_set(
|
||||||
|
train_filelist, args.cropsize, args.patches, args.sr, args.hop_length, model.offset)
|
||||||
|
|
||||||
|
X_train, y_train = dataset.mixup_generator(
|
||||||
|
X_train, y_train, args.mixup_rate, args.mixup_alpha)
|
||||||
|
|
||||||
|
if oracle_X is not None and oracle_y is not None:
|
||||||
|
perm = np.random.permutation(len(oracle_X))
|
||||||
|
X_train[perm] = oracle_X
|
||||||
|
y_train[perm] = oracle_y
|
||||||
|
|
||||||
|
print('# epoch', epoch)
|
||||||
|
instance_loss = np.zeros(len(X_train), dtype=np.float32)
|
||||||
|
for inner_epoch in range(args.inner_epoch):
|
||||||
|
print(' * inner epoch {}'.format(inner_epoch))
|
||||||
|
train_loss = train_inner_epoch(
|
||||||
|
X_train, y_train, model, optimizer, args.batchsize, instance_loss)
|
||||||
|
val_loss = val_inner_epoch(val_dataloader, model)
|
||||||
|
|
||||||
|
print(' * training loss = {:.6f}, validation loss = {:.6f}'
|
||||||
|
.format(train_loss * 1000, val_loss * 1000))
|
||||||
|
|
||||||
|
scheduler.step(val_loss)
|
||||||
|
|
||||||
|
if val_loss < best_loss:
|
||||||
|
best_loss = val_loss
|
||||||
|
print(' * best validation loss')
|
||||||
|
model_path = 'models/model_iter{}.pth'.format(epoch)
|
||||||
|
torch.save(model.state_dict(), model_path)
|
||||||
|
|
||||||
|
log.append([train_loss, val_loss])
|
||||||
|
with open('log_{}.json'.format(timestamp), 'w', encoding='utf8') as f:
|
||||||
|
json.dump(log, f, ensure_ascii=False)
|
||||||
|
|
||||||
|
if args.oracle_rate > 0:
|
||||||
|
instance_loss /= args.inner_epoch
|
||||||
|
oracle_X, oracle_y, idx = dataset.get_oracle_data(
|
||||||
|
X_train, y_train, instance_loss, args.oracle_rate, args.oracle_drop_rate)
|
||||||
|
print(' * oracle loss = {:.6f}'.format(instance_loss[idx].mean()))
|
||||||
|
|
||||||
|
del X_train, y_train
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user