Add files via upload
This commit is contained in:
228
VocalRemover.py
228
VocalRemover.py
@@ -22,14 +22,15 @@ from collections import defaultdict
|
|||||||
import queue
|
import queue
|
||||||
import threading # Run the algorithm inside a thread
|
import threading # Run the algorithm inside a thread
|
||||||
|
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
import inference_v5
|
import inference_v5
|
||||||
import win32gui, win32con
|
import inference_v5_ensemble
|
||||||
|
# import win32gui, win32con
|
||||||
|
|
||||||
the_program_to_hide = win32gui.GetForegroundWindow()
|
# the_program_to_hide = win32gui.GetForegroundWindow()
|
||||||
win32gui.ShowWindow(the_program_to_hide , win32con.SW_HIDE)
|
# win32gui.ShowWindow(the_program_to_hide , win32con.SW_HIDE)
|
||||||
|
|
||||||
# Change the current working directory to the directory
|
# Change the current working directory to the directory
|
||||||
# this file sits in
|
# this file sits in
|
||||||
@@ -44,19 +45,21 @@ os.chdir(base_path) # Change the current working directory to the base path
|
|||||||
|
|
||||||
instrumentalModels_dir = os.path.join(base_path, 'models')
|
instrumentalModels_dir = os.path.join(base_path, 'models')
|
||||||
banner_path = os.path.join(base_path, 'img', 'UVR-banner.png')
|
banner_path = os.path.join(base_path, 'img', 'UVR-banner.png')
|
||||||
refresh_path = os.path.join(base_path, 'img', 'refresh.png')
|
efile_path = os.path.join(base_path, 'img', 'file.png')
|
||||||
DEFAULT_DATA = {
|
DEFAULT_DATA = {
|
||||||
'exportPath': '',
|
'exportPath': '',
|
||||||
'inputPaths': [],
|
'inputPaths': [],
|
||||||
'gpu': False,
|
'gpu': False,
|
||||||
'postprocess': False,
|
'postprocess': False,
|
||||||
'tta': False,
|
'tta': False,
|
||||||
|
'save': True,
|
||||||
'output_image': False,
|
'output_image': False,
|
||||||
'window_size': '512',
|
'window_size': '512',
|
||||||
'agg': 10,
|
'agg': 10,
|
||||||
'modelFolder': False,
|
'modelFolder': False,
|
||||||
'modelInstrumentalLabel': '',
|
'modelInstrumentalLabel': '',
|
||||||
#'aiModel': 'v5',
|
'aiModel': 'Single Model',
|
||||||
|
'ensChoose': 'HP1 Models',
|
||||||
'useModel': 'instrumental',
|
'useModel': 'instrumental',
|
||||||
'lastDir': None,
|
'lastDir': None,
|
||||||
}
|
}
|
||||||
@@ -196,7 +199,7 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
PADDING = 10
|
PADDING = 10
|
||||||
|
|
||||||
COL1_ROWS = 6
|
COL1_ROWS = 6
|
||||||
COL2_ROWS = 5
|
COL2_ROWS = 6
|
||||||
COL3_ROWS = 6
|
COL3_ROWS = 6
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -223,7 +226,7 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
# --Variables--
|
# --Variables--
|
||||||
self.logo_img = open_image(path=banner_path,
|
self.logo_img = open_image(path=banner_path,
|
||||||
size=(self.winfo_width(), 9999))
|
size=(self.winfo_width(), 9999))
|
||||||
self.refresh_img = open_image(path=refresh_path,
|
self.efile_img = open_image(path=efile_path,
|
||||||
size=(20, 20))
|
size=(20, 20))
|
||||||
self.instrumentalLabel_to_path = defaultdict(lambda: '')
|
self.instrumentalLabel_to_path = defaultdict(lambda: '')
|
||||||
self.lastInstrumentalModels = []
|
self.lastInstrumentalModels = []
|
||||||
@@ -236,6 +239,7 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
self.gpuConversion_var = tk.BooleanVar(value=data['gpu'])
|
self.gpuConversion_var = tk.BooleanVar(value=data['gpu'])
|
||||||
self.postprocessing_var = tk.BooleanVar(value=data['postprocess'])
|
self.postprocessing_var = tk.BooleanVar(value=data['postprocess'])
|
||||||
self.tta_var = tk.BooleanVar(value=data['tta'])
|
self.tta_var = tk.BooleanVar(value=data['tta'])
|
||||||
|
self.save_var = tk.BooleanVar(value=data['save'])
|
||||||
self.outputImage_var = tk.BooleanVar(value=data['output_image'])
|
self.outputImage_var = tk.BooleanVar(value=data['output_image'])
|
||||||
# Models
|
# Models
|
||||||
self.instrumentalModel_var = tk.StringVar(value=data['modelInstrumentalLabel'])
|
self.instrumentalModel_var = tk.StringVar(value=data['modelInstrumentalLabel'])
|
||||||
@@ -245,8 +249,10 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
self.winSize_var = tk.StringVar(value=data['window_size'])
|
self.winSize_var = tk.StringVar(value=data['window_size'])
|
||||||
self.agg_var = tk.StringVar(value=data['agg'])
|
self.agg_var = tk.StringVar(value=data['agg'])
|
||||||
# AI model
|
# AI model
|
||||||
#self.aiModel_var = tk.StringVar(value=data['aiModel'])
|
self.aiModel_var = tk.StringVar(value=data['aiModel'])
|
||||||
#self.last_aiModel = self.aiModel_var.get()
|
self.last_aiModel = self.aiModel_var.get()
|
||||||
|
self.ensChoose_var = tk.StringVar(value=data['ensChoose'])
|
||||||
|
self.last_ensChoose = self.ensChoose_var.get()
|
||||||
# Other
|
# Other
|
||||||
self.inputPathsEntry_var = tk.StringVar(value='')
|
self.inputPathsEntry_var = tk.StringVar(value='')
|
||||||
self.lastDir = data['lastDir'] # nopep8
|
self.lastDir = data['lastDir'] # nopep8
|
||||||
@@ -277,9 +283,9 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
self.conversion_Button = ttk.Button(master=self,
|
self.conversion_Button = ttk.Button(master=self,
|
||||||
text='Start Conversion',
|
text='Start Conversion',
|
||||||
command=self.start_conversion)
|
command=self.start_conversion)
|
||||||
self.refresh_Button = ttk.Button(master=self,
|
self.efile_Button = ttk.Button(master=self,
|
||||||
image=self.refresh_img,
|
image=self.efile_img,
|
||||||
command=self.restart)
|
command=self.open_newModel_filedialog)
|
||||||
|
|
||||||
self.progressbar = ttk.Progressbar(master=self,
|
self.progressbar = ttk.Progressbar(master=self,
|
||||||
variable=self.progress_var)
|
variable=self.progress_var)
|
||||||
@@ -288,7 +294,7 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
background='#a0a0a0',
|
background='#a0a0a0',
|
||||||
borderwidth=0,)
|
borderwidth=0,)
|
||||||
self.command_Text.write(f'COMMAND LINE [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]') # nopep8
|
self.command_Text.write(f'COMMAND LINE [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]') # nopep8
|
||||||
|
|
||||||
def configure_widgets(self):
|
def configure_widgets(self):
|
||||||
"""Change widget styling and appearance"""
|
"""Change widget styling and appearance"""
|
||||||
|
|
||||||
@@ -322,7 +328,7 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
relx=0, rely=0, relwidth=1, relheight=0)
|
relx=0, rely=0, relwidth=1, relheight=0)
|
||||||
self.conversion_Button.place(x=10, y=self.IMAGE_HEIGHT + self.FILEPATHS_HEIGHT + self.OPTIONS_HEIGHT + self.PADDING*2, width=-20 - 40, height=self.CONVERSIONBUTTON_HEIGHT,
|
self.conversion_Button.place(x=10, y=self.IMAGE_HEIGHT + self.FILEPATHS_HEIGHT + self.OPTIONS_HEIGHT + self.PADDING*2, width=-20 - 40, height=self.CONVERSIONBUTTON_HEIGHT,
|
||||||
relx=0, rely=0, relwidth=1, relheight=0)
|
relx=0, rely=0, relwidth=1, relheight=0)
|
||||||
self.refresh_Button.place(x=-10 - 35, y=self.IMAGE_HEIGHT + self.FILEPATHS_HEIGHT + self.OPTIONS_HEIGHT + self.PADDING*2, width=35, height=self.CONVERSIONBUTTON_HEIGHT,
|
self.efile_Button.place(x=-10 - 35, y=self.IMAGE_HEIGHT + self.FILEPATHS_HEIGHT + self.OPTIONS_HEIGHT + self.PADDING*2, width=35, height=self.CONVERSIONBUTTON_HEIGHT,
|
||||||
relx=1, rely=0, relwidth=0, relheight=0)
|
relx=1, rely=0, relwidth=0, relheight=0)
|
||||||
self.command_Text.place(x=15, y=self.IMAGE_HEIGHT + self.FILEPATHS_HEIGHT + self.OPTIONS_HEIGHT + self.CONVERSIONBUTTON_HEIGHT + self.PADDING*3, width=-30, height=self.COMMAND_HEIGHT,
|
self.command_Text.place(x=15, y=self.IMAGE_HEIGHT + self.FILEPATHS_HEIGHT + self.OPTIONS_HEIGHT + self.CONVERSIONBUTTON_HEIGHT + self.PADDING*3, width=-30, height=self.COMMAND_HEIGHT,
|
||||||
relx=0, rely=0, relwidth=1, relheight=0)
|
relx=0, rely=0, relwidth=1, relheight=0)
|
||||||
@@ -380,11 +386,18 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
text='TTA',
|
text='TTA',
|
||||||
variable=self.tta_var,
|
variable=self.tta_var,
|
||||||
)
|
)
|
||||||
|
# Save Ensemble Outputs
|
||||||
|
self.options_save_Checkbutton = ttk.Checkbutton(master=self.options_Frame,
|
||||||
|
text='Save All Outputs',
|
||||||
|
variable=self.save_var,
|
||||||
|
)
|
||||||
# Save Image
|
# Save Image
|
||||||
self.options_image_Checkbutton = ttk.Checkbutton(master=self.options_Frame,
|
self.options_image_Checkbutton = ttk.Checkbutton(master=self.options_Frame,
|
||||||
text='Output Image',
|
text='Output Image',
|
||||||
variable=self.outputImage_var,
|
variable=self.outputImage_var,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Model Test Mode
|
||||||
self.options_modelFolder_Checkbutton = ttk.Checkbutton(master=self.options_Frame,
|
self.options_modelFolder_Checkbutton = ttk.Checkbutton(master=self.options_Frame,
|
||||||
text='Model Test Mode',
|
text='Model Test Mode',
|
||||||
variable=self.modelFolder_var,
|
variable=self.modelFolder_var,
|
||||||
@@ -407,12 +420,20 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
background='#404040', font=self.font, foreground='white', relief="groove")
|
background='#404040', font=self.font, foreground='white', relief="groove")
|
||||||
|
|
||||||
# AI model
|
# AI model
|
||||||
# self.options_aiModel_Label = tk.Label(master=self.options_Frame,
|
self.options_aiModel_Label = tk.Label(master=self.options_Frame,
|
||||||
# text='Choose AI Engine', anchor=tk.CENTER,
|
text='Choose Conversion Method', anchor=tk.CENTER,
|
||||||
# background='#63605f', font=self.font, foreground='white', relief="sunken")
|
background='#404040', font=self.font, foreground='white', relief="groove")
|
||||||
# self.options_aiModel_Optionmenu = ttk.OptionMenu(self.options_Frame,
|
self.options_aiModel_Optionmenu = ttk.OptionMenu(self.options_Frame,
|
||||||
# self.aiModel_var,
|
self.aiModel_var,
|
||||||
# None, 'v5')
|
None, 'Single Model', 'Ensemble Mode')
|
||||||
|
# Ensemble Mode
|
||||||
|
self.options_ensChoose_Label = tk.Label(master=self.options_Frame,
|
||||||
|
text='Choose Ensemble', anchor=tk.CENTER,
|
||||||
|
background='#404040', font=self.font, foreground='white', relief="groove")
|
||||||
|
self.options_ensChoose_Optionmenu = ttk.OptionMenu(self.options_Frame,
|
||||||
|
self.ensChoose_var,
|
||||||
|
None, 'HP1 Models', 'HP2 Models', 'All HP Models', 'Vocal Models')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# "Save to", "Select Your Audio File(s)"", and "Start Conversion" Button Style
|
# "Save to", "Select Your Audio File(s)"", and "Start Conversion" Button Style
|
||||||
@@ -428,10 +449,10 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
self.instrumentalModel_var)
|
self.instrumentalModel_var)
|
||||||
|
|
||||||
# Add Open Export Directory Button
|
# Add Open Export Directory Button
|
||||||
self.options_export_Button = ttk.Button(master=self.options_Frame,
|
# self.options_export_Button = ttk.Button(master=self.options_Frame,
|
||||||
text='Open Export Directory',
|
# text='Open Export Directory',
|
||||||
style="Bold.TButton",
|
# style="Bold.TButton",
|
||||||
command=self.open_newModel_filedialog)
|
# command=self.open_newModel_filedialog)
|
||||||
# -Place Widgets-
|
# -Place Widgets-
|
||||||
# -Column 1-
|
# -Column 1-
|
||||||
self.options_gpu_Checkbutton.place(x=0, y=0, width=0, height=0,
|
self.options_gpu_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
@@ -440,6 +461,8 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
relx=0, rely=1/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS)
|
relx=0, rely=1/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS)
|
||||||
self.options_tta_Checkbutton.place(x=0, y=0, width=0, height=0,
|
self.options_tta_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
relx=0, rely=2/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS)
|
relx=0, rely=2/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS)
|
||||||
|
self.options_save_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
|
relx=0, rely=4/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS)
|
||||||
self.options_image_Checkbutton.place(x=0, y=0, width=0, height=0,
|
self.options_image_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
relx=0, rely=3/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS)
|
relx=0, rely=3/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS)
|
||||||
self.options_modelFolder_Checkbutton.place(x=0, y=0, width=0, height=0,
|
self.options_modelFolder_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
@@ -447,12 +470,22 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
|
|
||||||
# -Column 2-
|
# -Column 2-
|
||||||
|
|
||||||
|
|
||||||
self.options_instrumentalModel_Label.place(x=-15, y=6, width=0, height=-10,
|
self.options_instrumentalModel_Label.place(x=-15, y=6, width=0, height=-10,
|
||||||
relx=1/3, rely=0/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
relx=1/3, rely=2/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
self.options_instrumentalModel_Optionmenu.place(x=-15, y=6, width=0, height=-10,
|
self.options_instrumentalModel_Optionmenu.place(x=-15, y=6, width=0, height=-10,
|
||||||
relx=1/3, rely=1/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
|
||||||
self.options_export_Button.place(x=0, y=0, width=-30, height=-8,
|
|
||||||
relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
|
|
||||||
|
|
||||||
|
self.options_ensChoose_Label.place(x=-15, y=6, width=0, height=-10,
|
||||||
|
relx=1/3, rely=0/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
|
self.options_ensChoose_Optionmenu.place(x=-15, y=6, width=0, height=-10,
|
||||||
|
relx=1/3, rely=1/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# self.options_export_Button.place(x=0, y=0, width=-30, height=-8,
|
||||||
|
# relx=2/3, rely=4/self.COL3_ROWS, relwidth=1/3, relheight=1/self.COL3_ROWS)
|
||||||
|
|
||||||
# -Column 3-
|
# -Column 3-
|
||||||
|
|
||||||
@@ -469,15 +502,17 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
relx=2/3, rely=3/self.COL3_ROWS, relwidth=1/3, relheight=1/self.COL3_ROWS)
|
relx=2/3, rely=3/self.COL3_ROWS, relwidth=1/3, relheight=1/self.COL3_ROWS)
|
||||||
|
|
||||||
|
|
||||||
# AI model
|
# Conversion Method
|
||||||
# self.options_aiModel_Label.place(x=5, y=-5, width=-30, height=-8,
|
self.options_aiModel_Label.place(x=-15, y=6, width=0, height=-10,
|
||||||
# relx=1/3, rely=5/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
relx=1/3, rely=0/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
# self.options_aiModel_Optionmenu.place(x=5, y=-5, width=-30, height=-8,
|
self.options_aiModel_Optionmenu.place(x=-15, y=4, width=0, height=-10,
|
||||||
# relx=1/3, rely=6/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
relx=1/3, rely=1/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Model deselect
|
# Model deselect
|
||||||
# self.aiModel_var.trace_add('write',
|
self.aiModel_var.trace_add('write',
|
||||||
# lambda *args: self.deselect_models())
|
lambda *args: self.deselect_models())
|
||||||
|
|
||||||
# Opening filedialogs
|
# Opening filedialogs
|
||||||
def open_file_filedialog(self):
|
def open_file_filedialog(self):
|
||||||
@@ -532,6 +567,7 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
else:
|
else:
|
||||||
window_size = int(self.winSize_var.get())
|
window_size = int(self.winSize_var.get())
|
||||||
agg = int(self.agg_var.get())
|
agg = int(self.agg_var.get())
|
||||||
|
ensChoose = str(self.ensChoose_var.get())
|
||||||
except ValueError: # Non integer was put in entry box
|
except ValueError: # Non integer was put in entry box
|
||||||
tk.messagebox.showwarning(master=self,
|
tk.messagebox.showwarning(master=self,
|
||||||
title='Invalid Input',
|
title='Invalid Input',
|
||||||
@@ -551,25 +587,25 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
message='You have selected an invalid music file! Please make sure that the file still exists!',
|
message='You have selected an invalid music file! Please make sure that the file still exists!',
|
||||||
detail=f'File path: {path}')
|
detail=f'File path: {path}')
|
||||||
return
|
return
|
||||||
if not os.path.isfile(instrumentalModel_path):
|
if self.aiModel_var.get() == 'Single Model':
|
||||||
tk.messagebox.showwarning(master=self,
|
if not os.path.isfile(instrumentalModel_path):
|
||||||
title='Invalid Main Model File',
|
tk.messagebox.showwarning(master=self,
|
||||||
message='You have selected an invalid main model file!\nPlease make sure that your model file still exists!')
|
title='Invalid Main Model File',
|
||||||
return
|
message='You have selected an invalid main model file!\nPlease make sure that your model file still exists!')
|
||||||
|
return
|
||||||
|
|
||||||
if not os.path.isdir(export_path):
|
if not os.path.isdir(export_path):
|
||||||
tk.messagebox.showwarning(master=self,
|
tk.messagebox.showwarning(master=self,
|
||||||
title='Invalid Export Directory',
|
title='Invalid Export Directory',
|
||||||
message='You have selected an invalid export directory!\nPlease make sure that your directory still exists!')
|
message='You have selected an invalid export directory!\nPlease make sure that your directory still exists!')
|
||||||
return
|
return
|
||||||
|
|
||||||
# if self.aiModel_var.get() == 'v4':
|
if self.aiModel_var.get() == 'Single Model':
|
||||||
# inference = inference_v4
|
inference = inference_v5
|
||||||
# elif self.aiModel_var.get() == 'v5':
|
elif self.aiModel_var.get() == 'Ensemble Mode':
|
||||||
# inference = inference_v5
|
inference = inference_v5_ensemble
|
||||||
# else:
|
else:
|
||||||
# raise TypeError('This error should not occur.')
|
raise TypeError('This error should not occur.')
|
||||||
|
|
||||||
inference = inference_v5
|
|
||||||
|
|
||||||
# -Run the algorithm-
|
# -Run the algorithm-
|
||||||
threading.Thread(target=inference.main,
|
threading.Thread(target=inference.main,
|
||||||
@@ -580,7 +616,8 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
# Processing Options
|
# Processing Options
|
||||||
'gpu': 0 if self.gpuConversion_var.get() else -1,
|
'gpu': 0 if self.gpuConversion_var.get() else -1,
|
||||||
'postprocess': self.postprocessing_var.get(),
|
'postprocess': self.postprocessing_var.get(),
|
||||||
'tta': self.tta_var.get(), # not needed for v2
|
'tta': self.tta_var.get(),
|
||||||
|
'save': self.save_var.get(),
|
||||||
'output_image': self.outputImage_var.get(),
|
'output_image': self.outputImage_var.get(),
|
||||||
# Models
|
# Models
|
||||||
'instrumentalModel': instrumentalModel_path,
|
'instrumentalModel': instrumentalModel_path,
|
||||||
@@ -591,10 +628,12 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
# Constants
|
# Constants
|
||||||
'window_size': window_size,
|
'window_size': window_size,
|
||||||
'agg': agg,
|
'agg': agg,
|
||||||
|
'ensChoose': ensChoose,
|
||||||
# Other Variables (Tkinter)
|
# Other Variables (Tkinter)
|
||||||
'window': self,
|
'window': self,
|
||||||
'text_widget': self.command_Text,
|
'text_widget': self.command_Text,
|
||||||
'button_widget': self.conversion_Button,
|
'button_widget': self.conversion_Button,
|
||||||
|
'inst_menu': self.options_instrumentalModel_Optionmenu,
|
||||||
'progress_var': self.progress_var,
|
'progress_var': self.progress_var,
|
||||||
},
|
},
|
||||||
daemon=True
|
daemon=True
|
||||||
@@ -647,23 +686,55 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
on certain selections
|
on certain selections
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Models
|
if self.aiModel_var.get() == 'Single Model':
|
||||||
# self.options_instrumentalModel_Label.configure(foreground='#000')
|
self.options_ensChoose_Label.place_forget()
|
||||||
# self.options_instrumentalModel_Optionmenu.configure(state=tk.NORMAL) # nopep8
|
self.options_ensChoose_Optionmenu.place_forget()
|
||||||
|
self.options_save_Checkbutton.configure(state=tk.DISABLED)
|
||||||
# if self.aiModel_var.get() == 'v5':
|
self.options_save_Checkbutton.place_forget()
|
||||||
# self.options_tta_Checkbutton.configure(state=tk.NORMAL)
|
self.options_modelFolder_Checkbutton.configure(state=tk.NORMAL)
|
||||||
# self.options_agg_Label.place(x=5, y=-5, width=-30, height=-8,
|
self.options_modelFolder_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
# relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
relx=0, rely=4/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS)
|
||||||
# self.options_agg_Entry.place(x=5, y=-4, width=-30, height=-8,
|
self.options_instrumentalModel_Label.place(x=-15, y=6, width=0, height=-10,
|
||||||
# relx=1/3, rely=4/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
|
self.options_instrumentalModel_Optionmenu.place(x=-15, y=6, width=0, height=-10,
|
||||||
|
relx=1/3, rely=4/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
|
else:
|
||||||
|
self.options_instrumentalModel_Label.place_forget()
|
||||||
|
self.options_instrumentalModel_Optionmenu.place_forget()
|
||||||
|
self.options_modelFolder_Checkbutton.place_forget()
|
||||||
|
self.options_modelFolder_Checkbutton.configure(state=tk.DISABLED)
|
||||||
|
self.options_save_Checkbutton.configure(state=tk.NORMAL)
|
||||||
|
self.options_save_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
|
relx=0, rely=4/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS)
|
||||||
|
self.options_ensChoose_Label.place(x=-15, y=6, width=0, height=-10,
|
||||||
|
relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
|
self.options_ensChoose_Optionmenu.place(x=-15, y=6, width=0, height=-10,
|
||||||
|
relx=1/3, rely=4/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
|
|
||||||
# else:
|
if self.aiModel_var.get() == 'Ensemble Mode':
|
||||||
# self.options_tta_Checkbutton.configure(state=tk.NORMAL)
|
self.options_instrumentalModel_Label.place_forget()
|
||||||
# self.options_agg_Label.place(x=5, y=-5, width=-30, height=-8,
|
self.options_instrumentalModel_Optionmenu.place_forget()
|
||||||
# relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
self.options_modelFolder_Checkbutton.place_forget()
|
||||||
# self.options_agg_Entry.place(x=5, y=-4, width=-30, height=-8,
|
self.options_modelFolder_Checkbutton.configure(state=tk.DISABLED)
|
||||||
# relx=1/3, rely=4/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
self.options_save_Checkbutton.configure(state=tk.NORMAL)
|
||||||
|
self.options_save_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
|
relx=0, rely=4/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS)
|
||||||
|
self.options_ensChoose_Label.place(x=-15, y=6, width=0, height=-10,
|
||||||
|
relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
|
self.options_ensChoose_Optionmenu.place(x=-15, y=6, width=0, height=-10,
|
||||||
|
relx=1/3, rely=4/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
|
else:
|
||||||
|
self.options_ensChoose_Label.place_forget()
|
||||||
|
self.options_ensChoose_Optionmenu.place_forget()
|
||||||
|
self.options_save_Checkbutton.configure(state=tk.DISABLED)
|
||||||
|
self.options_save_Checkbutton.place_forget()
|
||||||
|
self.options_modelFolder_Checkbutton.configure(state=tk.NORMAL)
|
||||||
|
self.options_modelFolder_Checkbutton.place(x=0, y=0, width=0, height=0,
|
||||||
|
relx=0, rely=4/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS)
|
||||||
|
self.options_instrumentalModel_Label.place(x=-15, y=6, width=0, height=-10,
|
||||||
|
relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
|
self.options_instrumentalModel_Optionmenu.place(x=-15, y=6, width=0, height=-10,
|
||||||
|
relx=1/3, rely=4/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS)
|
||||||
|
|
||||||
|
|
||||||
self.update_inputPaths()
|
self.update_inputPaths()
|
||||||
@@ -678,23 +749,26 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
self.last_aiModel = self.aiModel_var.get()
|
self.last_aiModel = self.aiModel_var.get()
|
||||||
|
|
||||||
self.instrumentalModel_var.set('')
|
self.instrumentalModel_var.set('')
|
||||||
|
self.ensChoose_var.set('HP1 Models')
|
||||||
|
|
||||||
self.winSize_var.set(DEFAULT_DATA['window_size'])
|
self.winSize_var.set(DEFAULT_DATA['window_size'])
|
||||||
self.agg_var.set(DEFAULT_DATA['agg'])
|
self.agg_var.set(DEFAULT_DATA['agg'])
|
||||||
|
self.modelFolder_var.set(DEFAULT_DATA['modelFolder'])
|
||||||
|
|
||||||
|
|
||||||
self.update_available_models()
|
self.update_available_models()
|
||||||
self.update_states()
|
self.update_states()
|
||||||
|
|
||||||
def restart(self):
|
# def restart(self):
|
||||||
"""
|
# """
|
||||||
Restart the application after asking for confirmation
|
# Restart the application after asking for confirmation
|
||||||
"""
|
# """
|
||||||
save = tk.messagebox.askyesno(title='Confirmation',
|
# save = tk.messagebox.askyesno(title='Confirmation',
|
||||||
message='The application will restart. Do you want to save the data?')
|
# message='The application will restart. Do you want to save the data?')
|
||||||
if save:
|
# if save:
|
||||||
self.save_values()
|
# self.save_values()
|
||||||
subprocess.Popen(f'python "{__file__}"', shell=True)
|
# subprocess.Popen(f'..App\Python\python.exe "{__file__}"')
|
||||||
exit()
|
# exit()
|
||||||
|
|
||||||
def save_values(self):
|
def save_values(self):
|
||||||
"""
|
"""
|
||||||
@@ -716,6 +790,7 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
'gpu': self.gpuConversion_var.get(),
|
'gpu': self.gpuConversion_var.get(),
|
||||||
'postprocess': self.postprocessing_var.get(),
|
'postprocess': self.postprocessing_var.get(),
|
||||||
'tta': self.tta_var.get(),
|
'tta': self.tta_var.get(),
|
||||||
|
'save': self.save_var.get(),
|
||||||
'output_image': self.outputImage_var.get(),
|
'output_image': self.outputImage_var.get(),
|
||||||
'window_size': window_size,
|
'window_size': window_size,
|
||||||
'agg': agg,
|
'agg': agg,
|
||||||
@@ -723,7 +798,8 @@ class MainWindow(TkinterDnD.Tk):
|
|||||||
'lastDir': self.lastDir,
|
'lastDir': self.lastDir,
|
||||||
'modelFolder': self.modelFolder_var.get(),
|
'modelFolder': self.modelFolder_var.get(),
|
||||||
'modelInstrumentalLabel': self.instrumentalModel_var.get(),
|
'modelInstrumentalLabel': self.instrumentalModel_var.get(),
|
||||||
#'aiModel': self.aiModel_var.get(),
|
'aiModel': self.aiModel_var.get(),
|
||||||
|
'ensChoose': self.ensChoose_var.get(),
|
||||||
})
|
})
|
||||||
|
|
||||||
self.destroy()
|
self.destroy()
|
||||||
|
|||||||
481
inference_v5.py
481
inference_v5.py
@@ -30,70 +30,8 @@ class VocalRemover(object):
|
|||||||
self.text_widget = text_widget
|
self.text_widget = text_widget
|
||||||
self.models = defaultdict(lambda: None)
|
self.models = defaultdict(lambda: None)
|
||||||
self.devices = defaultdict(lambda: None)
|
self.devices = defaultdict(lambda: None)
|
||||||
self._load_models()
|
|
||||||
# self.offset = model.offset
|
# self.offset = model.offset
|
||||||
|
|
||||||
def _load_models(self):
|
|
||||||
self.text_widget.write('Loading models...\n') # nopep8 Write Command Text
|
|
||||||
|
|
||||||
nn_arch_sizes = [
|
|
||||||
31191, # default
|
|
||||||
33966, 123821, 123812, 537238 # custom
|
|
||||||
]
|
|
||||||
|
|
||||||
global args
|
|
||||||
global model_params_d
|
|
||||||
|
|
||||||
p = argparse.ArgumentParser()
|
|
||||||
p.add_argument('--paramone', type=str, default='lib_v5/modelparams/4band_44100.json')
|
|
||||||
p.add_argument('--paramtwo', type=str, default='lib_v5/modelparams/4band_v2.json')
|
|
||||||
p.add_argument('--paramthree', type=str, default='lib_v5/modelparams/3band_44100_msb2.json')
|
|
||||||
p.add_argument('--paramfour', type=str, default='lib_v5/modelparams/4band_v2_sn.json')
|
|
||||||
p.add_argument('--aggressiveness',type=float, default=data['agg']/100)
|
|
||||||
p.add_argument('--nn_architecture', type=str, choices= ['auto'] + list('{}KB'.format(s) for s in nn_arch_sizes), default='auto')
|
|
||||||
p.add_argument('--high_end_process', type=str, default='mirroring')
|
|
||||||
args = p.parse_args()
|
|
||||||
|
|
||||||
if 'auto' == args.nn_architecture:
|
|
||||||
model_size = math.ceil(os.stat(data['instrumentalModel']).st_size / 1024)
|
|
||||||
args.nn_architecture = '{}KB'.format(min(nn_arch_sizes, key=lambda x:abs(x-model_size)))
|
|
||||||
|
|
||||||
nets = importlib.import_module('lib_v5.nets' + f'_{args.nn_architecture}'.replace('_{}KB'.format(nn_arch_sizes[0]), ''), package=None)
|
|
||||||
|
|
||||||
ModelName=(data['instrumentalModel'])
|
|
||||||
|
|
||||||
ModelParam1="4BAND_44100"
|
|
||||||
ModelParam2="4BAND_44100_B"
|
|
||||||
ModelParam3="MSB2"
|
|
||||||
ModelParam4="4BAND_44100_SN"
|
|
||||||
|
|
||||||
if ModelParam1 in ModelName:
|
|
||||||
model_params_d=args.paramone
|
|
||||||
if ModelParam2 in ModelName:
|
|
||||||
model_params_d=args.paramtwo
|
|
||||||
if ModelParam3 in ModelName:
|
|
||||||
model_params_d=args.paramthree
|
|
||||||
if ModelParam4 in ModelName:
|
|
||||||
model_params_d=args.paramfour
|
|
||||||
|
|
||||||
print(model_params_d)
|
|
||||||
|
|
||||||
mp = ModelParameters(model_params_d)
|
|
||||||
|
|
||||||
# -Instrumental-
|
|
||||||
if os.path.isfile(data['instrumentalModel']):
|
|
||||||
device = torch.device('cpu')
|
|
||||||
model = nets.CascadedASPPNet(mp.param['bins'] * 2)
|
|
||||||
model.load_state_dict(torch.load(self.data['instrumentalModel'],
|
|
||||||
map_location=device))
|
|
||||||
if torch.cuda.is_available() and self.data['gpu'] >= 0:
|
|
||||||
device = torch.device('cuda:{}'.format(self.data['gpu']))
|
|
||||||
model.to(device)
|
|
||||||
|
|
||||||
self.models['instrumental'] = model
|
|
||||||
self.devices['instrumental'] = device
|
|
||||||
|
|
||||||
self.text_widget.write('Done!\n')
|
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
# Paths
|
# Paths
|
||||||
@@ -152,6 +90,26 @@ def determineModelFolderName():
|
|||||||
|
|
||||||
def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress_var: tk.Variable,
|
def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress_var: tk.Variable,
|
||||||
**kwargs: dict):
|
**kwargs: dict):
|
||||||
|
|
||||||
|
global args
|
||||||
|
global model_params_d
|
||||||
|
global nn_arch_sizes
|
||||||
|
|
||||||
|
nn_arch_sizes = [
|
||||||
|
31191, # default
|
||||||
|
33966, 123821, 123812, 537238 # custom
|
||||||
|
]
|
||||||
|
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument('--paramone', type=str, default='lib_v5/modelparams/4band_44100.json')
|
||||||
|
p.add_argument('--paramtwo', type=str, default='lib_v5/modelparams/4band_v2.json')
|
||||||
|
p.add_argument('--paramthree', type=str, default='lib_v5/modelparams/3band_44100_msb2.json')
|
||||||
|
p.add_argument('--paramfour', type=str, default='lib_v5/modelparams/4band_v2_sn.json')
|
||||||
|
p.add_argument('--aggressiveness',type=float, default=data['agg']/100)
|
||||||
|
p.add_argument('--nn_architecture', type=str, choices= ['auto'] + list('{}KB'.format(s) for s in nn_arch_sizes), default='auto')
|
||||||
|
p.add_argument('--high_end_process', type=str, default='mirroring')
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def save_files(wav_instrument, wav_vocals):
|
def save_files(wav_instrument, wav_vocals):
|
||||||
"""Save output music files"""
|
"""Save output music files"""
|
||||||
@@ -215,210 +173,255 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
os.mkdir(folder_path)
|
os.mkdir(folder_path)
|
||||||
|
|
||||||
# Separation Preperation
|
# Separation Preperation
|
||||||
try:
|
try: #Load File(s)
|
||||||
for file_num, music_file in enumerate(data['input_paths'], start=1):
|
for file_num, music_file in enumerate(data['input_paths'], start=1):
|
||||||
# Determine File Name
|
# Determine File Name
|
||||||
base_name = f'{data["export_path"]}{modelFolderName}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}'
|
base_name = f'{data["export_path"]}{modelFolderName}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}'
|
||||||
# Start Separation
|
|
||||||
model_name = os.path.basename(data[f'{data["useModel"]}Model'])
|
|
||||||
model = vocal_remover.models[data['useModel']]
|
|
||||||
device = vocal_remover.devices[data['useModel']]
|
|
||||||
|
|
||||||
# -Get text and update progress-
|
|
||||||
base_text = get_baseText(total_files=len(data['input_paths']),
|
|
||||||
file_num=file_num)
|
|
||||||
progress_kwargs = {'progress_var': progress_var,
|
|
||||||
'total_files': len(data['input_paths']),
|
|
||||||
'file_num': file_num}
|
|
||||||
update_progress(**progress_kwargs,
|
|
||||||
step=0)
|
|
||||||
|
|
||||||
mp = ModelParameters(model_params_d)
|
|
||||||
|
|
||||||
# -Go through the different steps of seperation-
|
model_name = os.path.basename(data[f'{data["useModel"]}Model'])
|
||||||
# Wave source
|
model = vocal_remover.models[data['useModel']]
|
||||||
text_widget.write(base_text + 'Loading wave source...\n')
|
device = vocal_remover.devices[data['useModel']]
|
||||||
|
# -Get text and update progress-
|
||||||
X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
|
base_text = get_baseText(total_files=len(data['input_paths']),
|
||||||
|
file_num=file_num)
|
||||||
bands_n = len(mp.param['band'])
|
progress_kwargs = {'progress_var': progress_var,
|
||||||
|
'total_files': len(data['input_paths']),
|
||||||
for d in range(bands_n, 0, -1):
|
'file_num': file_num}
|
||||||
bp = mp.param['band'][d]
|
update_progress(**progress_kwargs,
|
||||||
|
step=0)
|
||||||
if d == bands_n: # high-end band
|
|
||||||
X_wave[d], _ = librosa.load(
|
|
||||||
music_file, bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
|
#Load Model(s)
|
||||||
|
text_widget.write(base_text + 'Loading models...')
|
||||||
if X_wave[d].ndim == 1:
|
|
||||||
X_wave[d] = np.asarray([X_wave[d], X_wave[d]])
|
|
||||||
else: # lower bands
|
if 'auto' == args.nn_architecture:
|
||||||
X_wave[d] = librosa.resample(X_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type'])
|
model_size = math.ceil(os.stat(data['instrumentalModel']).st_size / 1024)
|
||||||
|
args.nn_architecture = '{}KB'.format(min(nn_arch_sizes, key=lambda x:abs(x-model_size)))
|
||||||
|
|
||||||
|
nets = importlib.import_module('lib_v5.nets' + f'_{args.nn_architecture}'.replace('_{}KB'.format(nn_arch_sizes[0]), ''), package=None)
|
||||||
|
|
||||||
|
ModelName=(data['instrumentalModel'])
|
||||||
|
|
||||||
|
ModelParam1="4BAND_44100"
|
||||||
|
ModelParam2="4BAND_44100_B"
|
||||||
|
ModelParam3="MSB2"
|
||||||
|
ModelParam4="4BAND_44100_SN"
|
||||||
|
|
||||||
|
if ModelParam1 in ModelName:
|
||||||
|
model_params_d=args.paramone
|
||||||
|
if ModelParam2 in ModelName:
|
||||||
|
model_params_d=args.paramtwo
|
||||||
|
if ModelParam3 in ModelName:
|
||||||
|
model_params_d=args.paramthree
|
||||||
|
if ModelParam4 in ModelName:
|
||||||
|
model_params_d=args.paramfour
|
||||||
|
|
||||||
# Stft of wave source
|
print('Model Parameters:', model_params_d)
|
||||||
|
|
||||||
X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(X_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'],
|
mp = ModelParameters(model_params_d)
|
||||||
mp.param['mid_side_b2'], mp.param['reverse'])
|
|
||||||
|
|
||||||
if d == bands_n and args.high_end_process != 'none':
|
# -Instrumental-
|
||||||
input_high_end_h = (bp['n_fft']//2 - bp['crop_stop']) + (mp.param['pre_filter_stop'] - mp.param['pre_filter_start'])
|
if os.path.isfile(data['instrumentalModel']):
|
||||||
input_high_end = X_spec_s[d][:, bp['n_fft']//2-input_high_end_h:bp['n_fft']//2, :]
|
device = torch.device('cpu')
|
||||||
|
model = nets.CascadedASPPNet(mp.param['bins'] * 2)
|
||||||
text_widget.write(base_text + 'Done!\n')
|
model.load_state_dict(torch.load(data['instrumentalModel'],
|
||||||
|
map_location=device))
|
||||||
update_progress(**progress_kwargs,
|
if torch.cuda.is_available() and data['gpu'] >= 0:
|
||||||
step=0.1)
|
device = torch.device('cuda:{}'.format(data['gpu']))
|
||||||
|
model.to(device)
|
||||||
text_widget.write(base_text + 'Stft of wave source...\n')
|
|
||||||
|
|
||||||
text_widget.write(base_text + 'Done!\n')
|
|
||||||
|
|
||||||
text_widget.write(base_text + "Please Wait..\n")
|
|
||||||
|
|
||||||
X_spec_m = spec_utils.combine_spectrograms(X_spec_s, mp)
|
|
||||||
|
|
||||||
del X_wave, X_spec_s
|
|
||||||
|
|
||||||
def inference(X_spec, device, model, aggressiveness):
|
|
||||||
|
|
||||||
def _execute(X_mag_pad, roi_size, n_window, device, model, aggressiveness):
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
with torch.no_grad():
|
vocal_remover.models['instrumental'] = model
|
||||||
preds = []
|
vocal_remover.devices['instrumental'] = device
|
||||||
|
|
||||||
iterations = [n_window]
|
|
||||||
|
|
||||||
total_iterations = sum(iterations)
|
text_widget.write(' Done!\n')
|
||||||
|
|
||||||
|
model_name = os.path.basename(data[f'{data["useModel"]}Model'])
|
||||||
|
|
||||||
|
mp = ModelParameters(model_params_d)
|
||||||
|
|
||||||
text_widget.write(base_text + "Length: "f"{total_iterations} Slices\n")
|
# -Go through the different steps of seperation-
|
||||||
|
# Wave source
|
||||||
for i in tqdm(range(n_window)):
|
text_widget.write(base_text + 'Loading wave source...')
|
||||||
update_progress(**progress_kwargs,
|
|
||||||
step=(0.1 + (0.8/n_window * i)))
|
X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
|
||||||
start = i * roi_size
|
|
||||||
X_mag_window = X_mag_pad[None, :, :, start:start + data['window_size']]
|
bands_n = len(mp.param['band'])
|
||||||
X_mag_window = torch.from_numpy(X_mag_window).to(device)
|
|
||||||
|
for d in range(bands_n, 0, -1):
|
||||||
pred = model.predict(X_mag_window, aggressiveness)
|
bp = mp.param['band'][d]
|
||||||
|
|
||||||
pred = pred.detach().cpu().numpy()
|
if d == bands_n: # high-end band
|
||||||
preds.append(pred[0])
|
X_wave[d], _ = librosa.load(
|
||||||
|
music_file, bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
|
||||||
|
|
||||||
pred = np.concatenate(preds, axis=2)
|
if X_wave[d].ndim == 1:
|
||||||
|
X_wave[d] = np.asarray([X_wave[d], X_wave[d]])
|
||||||
|
else: # lower bands
|
||||||
|
X_wave[d] = librosa.resample(X_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type'])
|
||||||
|
|
||||||
|
# Stft of wave source
|
||||||
|
|
||||||
return pred
|
X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(X_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'],
|
||||||
|
mp.param['mid_side_b2'], mp.param['reverse'])
|
||||||
|
|
||||||
|
if d == bands_n and args.high_end_process != 'none':
|
||||||
|
input_high_end_h = (bp['n_fft']//2 - bp['crop_stop']) + (mp.param['pre_filter_stop'] - mp.param['pre_filter_start'])
|
||||||
|
input_high_end = X_spec_s[d][:, bp['n_fft']//2-input_high_end_h:bp['n_fft']//2, :]
|
||||||
|
|
||||||
def preprocess(X_spec):
|
text_widget.write('Done!\n')
|
||||||
X_mag = np.abs(X_spec)
|
|
||||||
X_phase = np.angle(X_spec)
|
|
||||||
|
|
||||||
return X_mag, X_phase
|
update_progress(**progress_kwargs,
|
||||||
|
step=0.1)
|
||||||
|
|
||||||
|
text_widget.write(base_text + 'Stft of wave source...')
|
||||||
|
|
||||||
X_mag, X_phase = preprocess(X_spec)
|
text_widget.write(' Done!\n')
|
||||||
|
|
||||||
coef = X_mag.max()
|
|
||||||
X_mag_pre = X_mag / coef
|
|
||||||
|
|
||||||
n_frame = X_mag_pre.shape[2]
|
|
||||||
pad_l, pad_r, roi_size = dataset.make_padding(n_frame,
|
|
||||||
data['window_size'], model.offset)
|
|
||||||
n_window = int(np.ceil(n_frame / roi_size))
|
|
||||||
|
|
||||||
X_mag_pad = np.pad(
|
|
||||||
X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
|
|
||||||
|
|
||||||
pred = _execute(X_mag_pad, roi_size, n_window,
|
text_widget.write(base_text + "Please Wait...\n")
|
||||||
device, model, aggressiveness)
|
|
||||||
pred = pred[:, :, :n_frame]
|
X_spec_m = spec_utils.combine_spectrograms(X_spec_s, mp)
|
||||||
|
|
||||||
if data['tta']:
|
del X_wave, X_spec_s
|
||||||
pad_l += roi_size // 2
|
|
||||||
pad_r += roi_size // 2
|
def inference(X_spec, device, model, aggressiveness):
|
||||||
n_window += 1
|
|
||||||
|
def _execute(X_mag_pad, roi_size, n_window, device, model, aggressiveness):
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
preds = []
|
||||||
|
|
||||||
|
iterations = [n_window]
|
||||||
|
|
||||||
|
total_iterations = sum(iterations)
|
||||||
|
|
||||||
|
text_widget.write(base_text + "Processing "f"{total_iterations} Slices... ")
|
||||||
|
|
||||||
|
for i in tqdm(range(n_window)):
|
||||||
|
update_progress(**progress_kwargs,
|
||||||
|
step=(0.1 + (0.8/n_window * i)))
|
||||||
|
start = i * roi_size
|
||||||
|
X_mag_window = X_mag_pad[None, :, :, start:start + data['window_size']]
|
||||||
|
X_mag_window = torch.from_numpy(X_mag_window).to(device)
|
||||||
|
|
||||||
|
pred = model.predict(X_mag_window, aggressiveness)
|
||||||
|
|
||||||
|
pred = pred.detach().cpu().numpy()
|
||||||
|
preds.append(pred[0])
|
||||||
|
|
||||||
|
pred = np.concatenate(preds, axis=2)
|
||||||
|
text_widget.write('Done!\n')
|
||||||
|
return pred
|
||||||
|
|
||||||
|
def preprocess(X_spec):
|
||||||
|
X_mag = np.abs(X_spec)
|
||||||
|
X_phase = np.angle(X_spec)
|
||||||
|
|
||||||
|
return X_mag, X_phase
|
||||||
|
|
||||||
|
X_mag, X_phase = preprocess(X_spec)
|
||||||
|
|
||||||
|
coef = X_mag.max()
|
||||||
|
X_mag_pre = X_mag / coef
|
||||||
|
|
||||||
|
n_frame = X_mag_pre.shape[2]
|
||||||
|
pad_l, pad_r, roi_size = dataset.make_padding(n_frame,
|
||||||
|
data['window_size'], model.offset)
|
||||||
|
n_window = int(np.ceil(n_frame / roi_size))
|
||||||
|
|
||||||
X_mag_pad = np.pad(
|
X_mag_pad = np.pad(
|
||||||
X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
|
X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
|
||||||
|
|
||||||
|
pred = _execute(X_mag_pad, roi_size, n_window,
|
||||||
|
device, model, aggressiveness)
|
||||||
|
pred = pred[:, :, :n_frame]
|
||||||
|
|
||||||
|
if data['tta']:
|
||||||
|
pad_l += roi_size // 2
|
||||||
|
pad_r += roi_size // 2
|
||||||
|
n_window += 1
|
||||||
|
|
||||||
pred_tta = _execute(X_mag_pad, roi_size, n_window,
|
X_mag_pad = np.pad(
|
||||||
device, model, aggressiveness)
|
X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
|
||||||
pred_tta = pred_tta[:, :, roi_size // 2:]
|
|
||||||
pred_tta = pred_tta[:, :, :n_frame]
|
|
||||||
|
|
||||||
return (pred + pred_tta) * 0.5 * coef, X_mag, np.exp(1.j * X_phase)
|
pred_tta = _execute(X_mag_pad, roi_size, n_window,
|
||||||
|
device, model, aggressiveness)
|
||||||
|
pred_tta = pred_tta[:, :, roi_size // 2:]
|
||||||
|
pred_tta = pred_tta[:, :, :n_frame]
|
||||||
|
|
||||||
|
return (pred + pred_tta) * 0.5 * coef, X_mag, np.exp(1.j * X_phase)
|
||||||
|
else:
|
||||||
|
return pred * coef, X_mag, np.exp(1.j * X_phase)
|
||||||
|
|
||||||
|
|
||||||
|
aggressiveness = {'value': args.aggressiveness, 'split_bin': mp.param['band'][1]['crop_stop']}
|
||||||
|
|
||||||
|
|
||||||
|
if data['tta']:
|
||||||
|
text_widget.write(base_text + "Running Inferences (TTA)...\n")
|
||||||
else:
|
else:
|
||||||
return pred * coef, X_mag, np.exp(1.j * X_phase)
|
text_widget.write(base_text + "Running Inference...\n")
|
||||||
|
|
||||||
|
pred, X_mag, X_phase = inference(X_spec_m,
|
||||||
aggressiveness = {'value': args.aggressiveness, 'split_bin': mp.param['band'][1]['crop_stop']}
|
device,
|
||||||
|
model, aggressiveness)
|
||||||
|
|
||||||
if data['tta']:
|
|
||||||
text_widget.write(base_text + "Running Inferences (TTA)...\n")
|
|
||||||
else:
|
|
||||||
text_widget.write(base_text + "Running Inference...\n")
|
|
||||||
|
|
||||||
pred, X_mag, X_phase = inference(X_spec_m,
|
|
||||||
device,
|
|
||||||
model, aggressiveness)
|
|
||||||
|
|
||||||
text_widget.write(base_text + 'Done!\n')
|
|
||||||
|
|
||||||
update_progress(**progress_kwargs,
|
|
||||||
step=0.9)
|
|
||||||
# Postprocess
|
|
||||||
if data['postprocess']:
|
|
||||||
text_widget.write(base_text + 'Post processing...\n')
|
|
||||||
pred_inv = np.clip(X_mag - pred, 0, np.inf)
|
|
||||||
pred = spec_utils.mask_silence(pred, pred_inv)
|
|
||||||
text_widget.write(base_text + 'Done!\n')
|
|
||||||
|
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=0.95)
|
step=0.9)
|
||||||
|
# Postprocess
|
||||||
|
if data['postprocess']:
|
||||||
|
text_widget.write(base_text + 'Post processing...')
|
||||||
|
pred_inv = np.clip(X_mag - pred, 0, np.inf)
|
||||||
|
pred = spec_utils.mask_silence(pred, pred_inv)
|
||||||
|
text_widget.write(' Done!\n')
|
||||||
|
|
||||||
# Inverse stft
|
update_progress(**progress_kwargs,
|
||||||
text_widget.write(base_text + 'Inverse stft of instruments and vocals...\n') # nopep8
|
step=0.95)
|
||||||
y_spec_m = pred * X_phase
|
|
||||||
v_spec_m = X_spec_m - y_spec_m
|
|
||||||
|
|
||||||
if args.high_end_process.startswith('mirroring'):
|
|
||||||
input_high_end_ = spec_utils.mirroring(args.high_end_process, y_spec_m, input_high_end, mp)
|
|
||||||
|
|
||||||
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp, input_high_end_h, input_high_end_)
|
|
||||||
else:
|
|
||||||
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp)
|
|
||||||
|
|
||||||
if args.high_end_process.startswith('mirroring'):
|
# Inverse stft
|
||||||
input_high_end_ = spec_utils.mirroring(args.high_end_process, v_spec_m, input_high_end, mp)
|
text_widget.write(base_text + 'Inverse stft of instruments and vocals...') # nopep8
|
||||||
|
y_spec_m = pred * X_phase
|
||||||
|
v_spec_m = X_spec_m - y_spec_m
|
||||||
|
|
||||||
|
if args.high_end_process.startswith('mirroring'):
|
||||||
|
input_high_end_ = spec_utils.mirroring(args.high_end_process, y_spec_m, input_high_end, mp)
|
||||||
|
|
||||||
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp, input_high_end_h, input_high_end_)
|
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp, input_high_end_h, input_high_end_)
|
||||||
else:
|
else:
|
||||||
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp)
|
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp)
|
||||||
|
|
||||||
text_widget.write(base_text + 'Done!\n')
|
|
||||||
|
|
||||||
update_progress(**progress_kwargs,
|
if args.high_end_process.startswith('mirroring'):
|
||||||
step=1)
|
input_high_end_ = spec_utils.mirroring(args.high_end_process, v_spec_m, input_high_end, mp)
|
||||||
# Save output music files
|
|
||||||
text_widget.write(base_text + 'Saving Files...\n')
|
|
||||||
save_files(wav_instrument, wav_vocals)
|
|
||||||
text_widget.write(base_text + 'Done!\n')
|
|
||||||
|
|
||||||
update_progress(**progress_kwargs,
|
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp, input_high_end_h, input_high_end_)
|
||||||
step=1)
|
else:
|
||||||
|
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp)
|
||||||
|
|
||||||
|
text_widget.write('Done!\n')
|
||||||
|
|
||||||
# Save output image
|
update_progress(**progress_kwargs,
|
||||||
if data['output_image']:
|
step=1)
|
||||||
with open('{}_Instruments.jpg'.format(base_name), mode='wb') as f:
|
# Save output music files
|
||||||
image = spec_utils.spectrogram_to_image(y_spec_m)
|
text_widget.write(base_text + 'Saving Files...')
|
||||||
_, bin_image = cv2.imencode('.jpg', image)
|
save_files(wav_instrument, wav_vocals)
|
||||||
bin_image.tofile(f)
|
text_widget.write(' Done!\n')
|
||||||
with open('{}_Vocals.jpg'.format(base_name), mode='wb') as f:
|
|
||||||
image = spec_utils.spectrogram_to_image(v_spec_m)
|
|
||||||
_, bin_image = cv2.imencode('.jpg', image)
|
|
||||||
bin_image.tofile(f)
|
|
||||||
|
|
||||||
text_widget.write(base_text + 'Completed Seperation!\n\n')
|
update_progress(**progress_kwargs,
|
||||||
|
step=1)
|
||||||
|
|
||||||
|
# Save output image
|
||||||
|
if data['output_image']:
|
||||||
|
with open('{}_Instruments.jpg'.format(base_name), mode='wb') as f:
|
||||||
|
image = spec_utils.spectrogram_to_image(y_spec_m)
|
||||||
|
_, bin_image = cv2.imencode('.jpg', image)
|
||||||
|
bin_image.tofile(f)
|
||||||
|
with open('{}_Vocals.jpg'.format(base_name), mode='wb') as f:
|
||||||
|
image = spec_utils.spectrogram_to_image(v_spec_m)
|
||||||
|
_, bin_image = cv2.imencode('.jpg', image)
|
||||||
|
bin_image.tofile(f)
|
||||||
|
|
||||||
|
text_widget.write(base_text + 'Completed Seperation!\n\n')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback_text = ''.join(traceback.format_tb(e.__traceback__))
|
traceback_text = ''.join(traceback.format_tb(e.__traceback__))
|
||||||
message = f'Traceback Error: "{traceback_text}"\n{type(e).__name__}: "{e}"\nFile: {music_file}\nPlease contact the creator and attach a screenshot of this error with the file and settings that caused it!'
|
message = f'Traceback Error: "{traceback_text}"\n{type(e).__name__}: "{e}"\nFile: {music_file}\nPlease contact the creator and attach a screenshot of this error with the file and settings that caused it!'
|
||||||
@@ -433,9 +436,9 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
return
|
return
|
||||||
|
|
||||||
os.remove('temp.wav')
|
os.remove('temp.wav')
|
||||||
|
|
||||||
progress_var.set(0)
|
progress_var.set(0)
|
||||||
text_widget.write(f'Conversion(s) Completed and Saving all Files!\n')
|
text_widget.write(f'\nConversion(s) Completed!\n')
|
||||||
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') # nopep8
|
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') # nopep8
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
button_widget.configure(state=tk.NORMAL) # Enable Button
|
button_widget.configure(state=tk.NORMAL) # Enable Button
|
||||||
623
inference_v5_ensemble.py
Normal file
623
inference_v5_ensemble.py
Normal file
@@ -0,0 +1,623 @@
|
|||||||
|
from functools import total_ordering
|
||||||
|
import pprint
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from statistics import mode
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import librosa
|
||||||
|
import numpy as np
|
||||||
|
import soundfile as sf
|
||||||
|
import shutil
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from lib_v5 import dataset
|
||||||
|
from lib_v5 import spec_utils
|
||||||
|
from lib_v5.model_param_init import ModelParameters
|
||||||
|
import torch
|
||||||
|
|
||||||
|
# Command line text parsing and widget manipulation
|
||||||
|
from collections import defaultdict
|
||||||
|
import tkinter as tk
|
||||||
|
import traceback # Error Message Recent Calls
|
||||||
|
import time # Timer
|
||||||
|
|
||||||
|
class VocalRemover(object):
|
||||||
|
|
||||||
|
def __init__(self, data, text_widget: tk.Text):
|
||||||
|
self.data = data
|
||||||
|
self.text_widget = text_widget
|
||||||
|
# self.offset = model.offset
|
||||||
|
|
||||||
|
|
||||||
|
data = {
|
||||||
|
# Paths
|
||||||
|
'input_paths': None,
|
||||||
|
'export_path': None,
|
||||||
|
# Processing Options
|
||||||
|
'gpu': -1,
|
||||||
|
'postprocess': True,
|
||||||
|
'tta': True,
|
||||||
|
'save': True,
|
||||||
|
'output_image': True,
|
||||||
|
# Models
|
||||||
|
'instrumentalModel': None,
|
||||||
|
'useModel': None,
|
||||||
|
# Constants
|
||||||
|
'window_size': 512,
|
||||||
|
'agg': 10,
|
||||||
|
'ensChoose': 'HP1 Models'
|
||||||
|
}
|
||||||
|
|
||||||
|
default_window_size = data['window_size']
|
||||||
|
default_agg = data['agg']
|
||||||
|
|
||||||
|
def update_progress(progress_var, total_files, file_num, step: float = 1):
|
||||||
|
"""Calculate the progress for the progress widget in the GUI"""
|
||||||
|
base = (100 / total_files)
|
||||||
|
progress = base * (file_num - 1)
|
||||||
|
progress += base * step
|
||||||
|
|
||||||
|
progress_var.set(progress)
|
||||||
|
|
||||||
|
|
||||||
|
def get_baseText(total_files, file_num):
|
||||||
|
"""Create the base text for the command widget"""
|
||||||
|
text = 'File {file_num}/{total_files} '.format(file_num=file_num,
|
||||||
|
total_files=total_files)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress_var: tk.Variable,
|
||||||
|
**kwargs: dict):
|
||||||
|
|
||||||
|
global args
|
||||||
|
global nn_arch_sizes
|
||||||
|
|
||||||
|
nn_arch_sizes = [
|
||||||
|
31191, # default
|
||||||
|
33966, 123821, 123812, 537238 # custom
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument('--aggressiveness',type=float, default=data['agg']/100)
|
||||||
|
p.add_argument('--high_end_process', type=str, default='mirroring')
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def save_files(wav_instrument, wav_vocals):
|
||||||
|
"""Save output music files"""
|
||||||
|
vocal_name = '(Vocals)'
|
||||||
|
instrumental_name = '(Instrumental)'
|
||||||
|
save_path = os.path.dirname(base_name)
|
||||||
|
|
||||||
|
# Swap names if vocal model
|
||||||
|
|
||||||
|
VModel="Vocal"
|
||||||
|
|
||||||
|
if VModel in model_name:
|
||||||
|
# Reverse names
|
||||||
|
vocal_name, instrumental_name = instrumental_name, vocal_name
|
||||||
|
|
||||||
|
# Save Temp File
|
||||||
|
# For instrumental the instrumental is the temp file
|
||||||
|
# and for vocal the instrumental is the temp file due
|
||||||
|
# to reversement
|
||||||
|
sf.write(f'temp.wav',
|
||||||
|
wav_instrument, mp.param['sr'])
|
||||||
|
|
||||||
|
# -Save files-
|
||||||
|
# Instrumental
|
||||||
|
if instrumental_name is not None:
|
||||||
|
instrumental_path = '{save_path}/{file_name}.wav'.format(
|
||||||
|
save_path=save_path,
|
||||||
|
file_name = f'{os.path.basename(base_name)}_{ModelName_1}_{instrumental_name}',
|
||||||
|
)
|
||||||
|
|
||||||
|
sf.write(instrumental_path,
|
||||||
|
wav_instrument, mp.param['sr'])
|
||||||
|
# Vocal
|
||||||
|
if vocal_name is not None:
|
||||||
|
vocal_path = '{save_path}/{file_name}.wav'.format(
|
||||||
|
save_path=save_path,
|
||||||
|
file_name=f'{os.path.basename(base_name)}_{ModelName_1}_{vocal_name}',
|
||||||
|
)
|
||||||
|
sf.write(vocal_path,
|
||||||
|
wav_vocals, mp.param['sr'])
|
||||||
|
|
||||||
|
data.update(kwargs)
|
||||||
|
|
||||||
|
# Update default settings
|
||||||
|
global default_window_size
|
||||||
|
global default_agg
|
||||||
|
default_window_size = data['window_size']
|
||||||
|
default_agg = data['agg']
|
||||||
|
|
||||||
|
stime = time.perf_counter()
|
||||||
|
progress_var.set(0)
|
||||||
|
text_widget.clear()
|
||||||
|
button_widget.configure(state=tk.DISABLED) # Disable Button
|
||||||
|
|
||||||
|
# Separation Preperation
|
||||||
|
try: #Load File(s)
|
||||||
|
|
||||||
|
|
||||||
|
HP1_Models = [
|
||||||
|
{
|
||||||
|
'model_name':'HP_4BAND_44100_A',
|
||||||
|
'model_params':'lib_v5/modelparams/4band_44100.json',
|
||||||
|
'model_location':'models/Main Models/HP_4BAND_44100_A.pth',
|
||||||
|
'using_archtecture': '123821KB',
|
||||||
|
'loop_name': 'Ensemble Mode - Model 1/2'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'model_name':'HP_4BAND_44100_B',
|
||||||
|
'model_params':'lib_v5/modelparams/4band_v2.json',
|
||||||
|
'model_location':'models/Main Models/HP_4BAND_44100_B.pth',
|
||||||
|
'using_archtecture': '123821KB',
|
||||||
|
'loop_name': 'Ensemble Mode - Model 2/2'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
HP2_Models = [
|
||||||
|
{
|
||||||
|
'model_name':'HP2_4BAND_44100_1',
|
||||||
|
'model_params':'lib_v5/modelparams/4band_44100.json',
|
||||||
|
'model_location':'models/Main Models/HP2_4BAND_44100_1.pth',
|
||||||
|
'using_archtecture': '537238KB',
|
||||||
|
'loop_name': 'Ensemble Mode - Model 1/3'
|
||||||
|
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'model_name':'HP2_4BAND_44100_2',
|
||||||
|
'model_params':'lib_v5/modelparams/4band_44100.json',
|
||||||
|
'model_location':'models/Main Models/HP2_4BAND_44100_2.pth',
|
||||||
|
'using_archtecture': '537238KB',
|
||||||
|
'loop_name': 'Ensemble Mode - Model 2/3'
|
||||||
|
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'model_name':'HP2_3BAND_44100_MSB2',
|
||||||
|
'model_params':'lib_v5/modelparams/3band_44100_msb2.json',
|
||||||
|
'model_location':'models/Main Models/HP2_3BAND_44100_MSB2.pth',
|
||||||
|
'using_archtecture': '537227KB',
|
||||||
|
'loop_name': 'Ensemble Mode - Model 3/3'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
All_HP_Models = [
|
||||||
|
{
|
||||||
|
'model_name':'HP_4BAND_44100_A',
|
||||||
|
'model_params':'lib_v5/modelparams/4band_44100.json',
|
||||||
|
'model_location':'models/Main Models/HP_4BAND_44100_A.pth',
|
||||||
|
'using_archtecture': '123821KB',
|
||||||
|
'loop_name': 'Ensemble Mode - Model 1/5'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'model_name':'HP_4BAND_44100_B',
|
||||||
|
'model_params':'lib_v5/modelparams/4band_v2.json',
|
||||||
|
'model_location':'models/Main Models/HP_4BAND_44100_B.pth',
|
||||||
|
'using_archtecture': '123821KB',
|
||||||
|
'loop_name': 'Ensemble Mode - Model 2/5'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'model_name':'HP2_4BAND_44100_1',
|
||||||
|
'model_params':'lib_v5/modelparams/4band_44100.json',
|
||||||
|
'model_location':'models/Main Models/HP2_4BAND_44100_1.pth',
|
||||||
|
'using_archtecture': '537238KB',
|
||||||
|
'loop_name': 'Ensemble Mode - Model 3/5'
|
||||||
|
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'model_name':'HP2_4BAND_44100_2',
|
||||||
|
'model_params':'lib_v5/modelparams/4band_44100.json',
|
||||||
|
'model_location':'models/Main Models/HP2_4BAND_44100_2.pth',
|
||||||
|
'using_archtecture': '537238KB',
|
||||||
|
'loop_name': 'Ensemble Mode - Model 4/5'
|
||||||
|
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'model_name':'HP2_3BAND_44100_MSB2',
|
||||||
|
'model_params':'lib_v5/modelparams/3band_44100_msb2.json',
|
||||||
|
'model_location':'models/Main Models/HP2_3BAND_44100_MSB2.pth',
|
||||||
|
'using_archtecture': '537227KB',
|
||||||
|
'loop_name': 'Ensemble Mode - Model 5/5'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
Vocal_Models = [
|
||||||
|
{
|
||||||
|
'model_name':'HP_Vocal_4BAND_44100',
|
||||||
|
'model_params':'lib_v5/modelparams/4band_44100.json',
|
||||||
|
'model_location':'models/Main Models/HP_Vocal_4BAND_44100.pth',
|
||||||
|
'using_archtecture': '123821KB',
|
||||||
|
'loop_name': 'Ensemble Mode - Model 1/2'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'model_name':'HP_Vocal_AGG_4BAND_44100',
|
||||||
|
'model_params':'lib_v5/modelparams/4band_44100.json',
|
||||||
|
'model_location':'models/Main Models/HP_Vocal_AGG_4BAND_44100.pth',
|
||||||
|
'using_archtecture': '123821KB',
|
||||||
|
'loop_name': 'Ensemble Mode - Model 2/2'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
if data['ensChoose'] == 'HP1 Models':
|
||||||
|
loops = HP1_Models
|
||||||
|
ensefolder = 'HP_Models_Saved_Outputs'
|
||||||
|
ensemode = 'HP_Models'
|
||||||
|
if data['ensChoose'] == 'HP2 Models':
|
||||||
|
loops = HP2_Models
|
||||||
|
ensefolder = 'HP2_Models_Saved_Outputs'
|
||||||
|
ensemode = 'HP2_Models'
|
||||||
|
if data['ensChoose'] == 'All HP Models':
|
||||||
|
loops = All_HP_Models
|
||||||
|
ensefolder = 'All_HP_Models_Saved_Outputs'
|
||||||
|
ensemode = 'All_HP_Models'
|
||||||
|
if data['ensChoose'] == 'Vocal Models':
|
||||||
|
loops = Vocal_Models
|
||||||
|
ensefolder = 'Vocal_Models_Saved_Outputs'
|
||||||
|
ensemode = 'Vocal_Models'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for file_num, music_file in enumerate(data['input_paths'], start=1):
|
||||||
|
|
||||||
|
# -Get text and update progress-
|
||||||
|
base_text = get_baseText(total_files=len(data['input_paths']),
|
||||||
|
file_num=file_num)
|
||||||
|
progress_kwargs = {'progress_var': progress_var,
|
||||||
|
'total_files': len(data['input_paths']),
|
||||||
|
'file_num': file_num}
|
||||||
|
update_progress(**progress_kwargs,
|
||||||
|
step=0)
|
||||||
|
|
||||||
|
|
||||||
|
#Load Model(s)
|
||||||
|
#text_widget.write(base_text + 'Loading models...')
|
||||||
|
|
||||||
|
for i, c in tqdm(enumerate(loops), disable=True, desc='Iterations..'):
|
||||||
|
|
||||||
|
text_widget.write(c['loop_name'] + '\n\n')
|
||||||
|
|
||||||
|
text_widget.write(base_text + 'Loading ' + c['model_name'] + '... ')
|
||||||
|
|
||||||
|
arch_now = c['using_archtecture']
|
||||||
|
|
||||||
|
if arch_now == '123821KB':
|
||||||
|
from lib_v5 import nets_123821KB as nets
|
||||||
|
elif arch_now == '537238KB':
|
||||||
|
from lib_v5 import nets_537238KB as nets
|
||||||
|
elif arch_now == '537227KB':
|
||||||
|
from lib_v5 import nets_537227KB as nets
|
||||||
|
|
||||||
|
def determineenseFolderName():
|
||||||
|
"""
|
||||||
|
Determine the name that is used for the folder and appended
|
||||||
|
to the back of the music files
|
||||||
|
"""
|
||||||
|
enseFolderName = ''
|
||||||
|
|
||||||
|
# -Instrumental-
|
||||||
|
if str(ensefolder):
|
||||||
|
enseFolderName += os.path.splitext(os.path.basename(ensefolder))[0]
|
||||||
|
|
||||||
|
if enseFolderName:
|
||||||
|
enseFolderName = '/' + enseFolderName
|
||||||
|
|
||||||
|
return enseFolderName
|
||||||
|
|
||||||
|
enseFolderName = determineenseFolderName()
|
||||||
|
if enseFolderName:
|
||||||
|
folder_path = f'{data["export_path"]}{enseFolderName}'
|
||||||
|
if not os.path.isdir(folder_path):
|
||||||
|
os.mkdir(folder_path)
|
||||||
|
|
||||||
|
# Determine File Name
|
||||||
|
base_name = f'{data["export_path"]}{enseFolderName}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}'
|
||||||
|
enseExport = f'{data["export_path"]}{enseFolderName}/'
|
||||||
|
trackname = f'{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}'
|
||||||
|
|
||||||
|
ModelName_1=(c['model_name'])
|
||||||
|
|
||||||
|
print('Model Parameters:', c['model_params'])
|
||||||
|
|
||||||
|
mp = ModelParameters(c['model_params'])
|
||||||
|
|
||||||
|
# -Instrumental-
|
||||||
|
if os.path.isfile(c['model_location']):
|
||||||
|
device = torch.device('cpu')
|
||||||
|
model = nets.CascadedASPPNet(mp.param['bins'] * 2)
|
||||||
|
model.load_state_dict(torch.load(c['model_location'],
|
||||||
|
map_location=device))
|
||||||
|
if torch.cuda.is_available() and data['gpu'] >= 0:
|
||||||
|
device = torch.device('cuda:{}'.format(data['gpu']))
|
||||||
|
model.to(device)
|
||||||
|
|
||||||
|
text_widget.write('Done!\n')
|
||||||
|
|
||||||
|
model_name = os.path.basename(c["model_name"])
|
||||||
|
|
||||||
|
|
||||||
|
# -Go through the different steps of seperation-
|
||||||
|
# Wave source
|
||||||
|
text_widget.write(base_text + 'Loading wave source... ')
|
||||||
|
|
||||||
|
X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
|
||||||
|
|
||||||
|
bands_n = len(mp.param['band'])
|
||||||
|
|
||||||
|
for d in range(bands_n, 0, -1):
|
||||||
|
bp = mp.param['band'][d]
|
||||||
|
|
||||||
|
if d == bands_n: # high-end band
|
||||||
|
X_wave[d], _ = librosa.load(
|
||||||
|
music_file, bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
|
||||||
|
|
||||||
|
if X_wave[d].ndim == 1:
|
||||||
|
X_wave[d] = np.asarray([X_wave[d], X_wave[d]])
|
||||||
|
else: # lower bands
|
||||||
|
X_wave[d] = librosa.resample(X_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type'])
|
||||||
|
|
||||||
|
# Stft of wave source
|
||||||
|
|
||||||
|
X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(X_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'],
|
||||||
|
mp.param['mid_side_b2'], mp.param['reverse'])
|
||||||
|
|
||||||
|
if d == bands_n and args.high_end_process != 'none':
|
||||||
|
input_high_end_h = (bp['n_fft']//2 - bp['crop_stop']) + (mp.param['pre_filter_stop'] - mp.param['pre_filter_start'])
|
||||||
|
input_high_end = X_spec_s[d][:, bp['n_fft']//2-input_high_end_h:bp['n_fft']//2, :]
|
||||||
|
|
||||||
|
text_widget.write('Done!\n')
|
||||||
|
|
||||||
|
update_progress(**progress_kwargs,
|
||||||
|
step=0.1)
|
||||||
|
|
||||||
|
text_widget.write(base_text + 'Stft of wave source... ')
|
||||||
|
|
||||||
|
text_widget.write('Done!\n')
|
||||||
|
|
||||||
|
text_widget.write(base_text + "Please Wait...\n")
|
||||||
|
|
||||||
|
X_spec_m = spec_utils.combine_spectrograms(X_spec_s, mp)
|
||||||
|
|
||||||
|
del X_wave, X_spec_s
|
||||||
|
|
||||||
|
def inference(X_spec, device, model, aggressiveness):
|
||||||
|
|
||||||
|
def _execute(X_mag_pad, roi_size, n_window, device, model, aggressiveness):
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
preds = []
|
||||||
|
|
||||||
|
iterations = [n_window]
|
||||||
|
|
||||||
|
total_iterations = sum(iterations)
|
||||||
|
|
||||||
|
text_widget.write(base_text + "Processing "f"{total_iterations} Slices... ")
|
||||||
|
|
||||||
|
for i in tqdm(range(n_window)):
|
||||||
|
update_progress(**progress_kwargs,
|
||||||
|
step=(0.1 + (0.8/n_window * i)))
|
||||||
|
start = i * roi_size
|
||||||
|
X_mag_window = X_mag_pad[None, :, :, start:start + data['window_size']]
|
||||||
|
X_mag_window = torch.from_numpy(X_mag_window).to(device)
|
||||||
|
|
||||||
|
pred = model.predict(X_mag_window, aggressiveness)
|
||||||
|
|
||||||
|
pred = pred.detach().cpu().numpy()
|
||||||
|
preds.append(pred[0])
|
||||||
|
|
||||||
|
pred = np.concatenate(preds, axis=2)
|
||||||
|
|
||||||
|
text_widget.write('Done!\n')
|
||||||
|
return pred
|
||||||
|
|
||||||
|
def preprocess(X_spec):
|
||||||
|
X_mag = np.abs(X_spec)
|
||||||
|
X_phase = np.angle(X_spec)
|
||||||
|
|
||||||
|
return X_mag, X_phase
|
||||||
|
|
||||||
|
X_mag, X_phase = preprocess(X_spec)
|
||||||
|
|
||||||
|
coef = X_mag.max()
|
||||||
|
X_mag_pre = X_mag / coef
|
||||||
|
|
||||||
|
n_frame = X_mag_pre.shape[2]
|
||||||
|
pad_l, pad_r, roi_size = dataset.make_padding(n_frame,
|
||||||
|
data['window_size'], model.offset)
|
||||||
|
n_window = int(np.ceil(n_frame / roi_size))
|
||||||
|
|
||||||
|
X_mag_pad = np.pad(
|
||||||
|
X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
|
||||||
|
|
||||||
|
pred = _execute(X_mag_pad, roi_size, n_window,
|
||||||
|
device, model, aggressiveness)
|
||||||
|
pred = pred[:, :, :n_frame]
|
||||||
|
|
||||||
|
if data['tta']:
|
||||||
|
pad_l += roi_size // 2
|
||||||
|
pad_r += roi_size // 2
|
||||||
|
n_window += 1
|
||||||
|
|
||||||
|
X_mag_pad = np.pad(
|
||||||
|
X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
|
||||||
|
|
||||||
|
pred_tta = _execute(X_mag_pad, roi_size, n_window,
|
||||||
|
device, model, aggressiveness)
|
||||||
|
pred_tta = pred_tta[:, :, roi_size // 2:]
|
||||||
|
pred_tta = pred_tta[:, :, :n_frame]
|
||||||
|
|
||||||
|
return (pred + pred_tta) * 0.5 * coef, X_mag, np.exp(1.j * X_phase)
|
||||||
|
else:
|
||||||
|
return pred * coef, X_mag, np.exp(1.j * X_phase)
|
||||||
|
|
||||||
|
aggressiveness = {'value': args.aggressiveness, 'split_bin': mp.param['band'][1]['crop_stop']}
|
||||||
|
|
||||||
|
|
||||||
|
if data['tta']:
|
||||||
|
text_widget.write(base_text + "Running Inferences (TTA)... \n")
|
||||||
|
else:
|
||||||
|
text_widget.write(base_text + "Running Inference... \n")
|
||||||
|
|
||||||
|
pred, X_mag, X_phase = inference(X_spec_m,
|
||||||
|
device,
|
||||||
|
model, aggressiveness)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
update_progress(**progress_kwargs,
|
||||||
|
step=0.85)
|
||||||
|
# Postprocess
|
||||||
|
if data['postprocess']:
|
||||||
|
text_widget.write(base_text + 'Post processing... ')
|
||||||
|
pred_inv = np.clip(X_mag - pred, 0, np.inf)
|
||||||
|
pred = spec_utils.mask_silence(pred, pred_inv)
|
||||||
|
text_widget.write('Done!\n')
|
||||||
|
|
||||||
|
update_progress(**progress_kwargs,
|
||||||
|
step=0.85)
|
||||||
|
|
||||||
|
# Inverse stft
|
||||||
|
text_widget.write(base_text + 'Inverse stft of instruments and vocals... ') # nopep8
|
||||||
|
y_spec_m = pred * X_phase
|
||||||
|
v_spec_m = X_spec_m - y_spec_m
|
||||||
|
|
||||||
|
if args.high_end_process.startswith('mirroring'):
|
||||||
|
input_high_end_ = spec_utils.mirroring(args.high_end_process, y_spec_m, input_high_end, mp)
|
||||||
|
|
||||||
|
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp, input_high_end_h, input_high_end_)
|
||||||
|
else:
|
||||||
|
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp)
|
||||||
|
|
||||||
|
if args.high_end_process.startswith('mirroring'):
|
||||||
|
input_high_end_ = spec_utils.mirroring(args.high_end_process, v_spec_m, input_high_end, mp)
|
||||||
|
|
||||||
|
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp, input_high_end_h, input_high_end_)
|
||||||
|
else:
|
||||||
|
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp)
|
||||||
|
|
||||||
|
text_widget.write('Done!\n')
|
||||||
|
|
||||||
|
update_progress(**progress_kwargs,
|
||||||
|
step=0.9)
|
||||||
|
# Save output music files
|
||||||
|
text_widget.write(base_text + 'Saving Files... ')
|
||||||
|
save_files(wav_instrument, wav_vocals)
|
||||||
|
text_widget.write('Done!\n')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Save output image
|
||||||
|
if data['output_image']:
|
||||||
|
with open('{}_Instruments.jpg'.format(base_name), mode='wb') as f:
|
||||||
|
image = spec_utils.spectrogram_to_image(y_spec_m)
|
||||||
|
_, bin_image = cv2.imencode('.jpg', image)
|
||||||
|
bin_image.tofile(f)
|
||||||
|
with open('{}_Vocals.jpg'.format(base_name), mode='wb') as f:
|
||||||
|
image = spec_utils.spectrogram_to_image(v_spec_m)
|
||||||
|
_, bin_image = cv2.imencode('.jpg', image)
|
||||||
|
bin_image.tofile(f)
|
||||||
|
|
||||||
|
text_widget.write(base_text + 'Completed Seperation!\n\n')
|
||||||
|
|
||||||
|
# Emsembling Outputs
|
||||||
|
|
||||||
|
def get_files(folder="", prefix="", suffix=""):
|
||||||
|
return [f"{folder}{i}" for i in os.listdir(folder) if i.startswith(prefix) if i.endswith(suffix)]
|
||||||
|
|
||||||
|
ensambles = [
|
||||||
|
{
|
||||||
|
'algorithm':'min_mag',
|
||||||
|
'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json',
|
||||||
|
'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"),
|
||||||
|
'output':'{}_Ensembled_{}_Instrumentals'.format(trackname, ensemode),
|
||||||
|
'type': 'Instrumentals'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'algorithm':'max_mag',
|
||||||
|
'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json',
|
||||||
|
'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"),
|
||||||
|
'output': '{}_Ensembled_{}_Vocals'.format(trackname, ensemode),
|
||||||
|
'type': 'Vocals'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
for i, e in tqdm(enumerate(ensambles), desc="Ensembling..."):
|
||||||
|
|
||||||
|
text_widget.write(base_text + "Ensembling " + e['type'] + "... ")
|
||||||
|
|
||||||
|
wave, specs = {}, {}
|
||||||
|
|
||||||
|
mp = ModelParameters(e['model_params'])
|
||||||
|
|
||||||
|
for i in range(len(e['files'])):
|
||||||
|
spec = {}
|
||||||
|
|
||||||
|
for d in range(len(mp.param['band']), 0, -1):
|
||||||
|
bp = mp.param['band'][d]
|
||||||
|
|
||||||
|
if d == len(mp.param['band']): # high-end band
|
||||||
|
wave[d], _ = librosa.load(
|
||||||
|
e['files'][i], bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
|
||||||
|
|
||||||
|
if len(wave[d].shape) == 1: # mono to stereo
|
||||||
|
wave[d] = np.array([wave[d], wave[d]])
|
||||||
|
else: # lower bands
|
||||||
|
wave[d] = librosa.resample(wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type'])
|
||||||
|
|
||||||
|
spec[d] = spec_utils.wave_to_spectrogram(wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse'])
|
||||||
|
|
||||||
|
specs[i] = spec_utils.combine_spectrograms(spec, mp)
|
||||||
|
|
||||||
|
del wave
|
||||||
|
|
||||||
|
sf.write(os.path.join('{}'.format(data['export_path']),'{}.wav'.format(e['output'])),
|
||||||
|
spec_utils.cmb_spectrogram_to_wave(spec_utils.ensembling(e['algorithm'],
|
||||||
|
specs), mp), mp.param['sr'])
|
||||||
|
|
||||||
|
if not data['save']: # Deletes all outputs if Save All Outputs: is checked
|
||||||
|
files = e['files']
|
||||||
|
for file in files:
|
||||||
|
os.remove(file)
|
||||||
|
|
||||||
|
text_widget.write("Done!\n")
|
||||||
|
|
||||||
|
|
||||||
|
update_progress(**progress_kwargs,
|
||||||
|
step=0.95)
|
||||||
|
text_widget.write("\n")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
traceback_text = ''.join(traceback.format_tb(e.__traceback__))
|
||||||
|
message = f'Traceback Error: "{traceback_text}"\n{type(e).__name__}: "{e}"\nFile: {music_file}\nPlease contact the creator and attach a screenshot of this error with the file and settings that caused it!'
|
||||||
|
tk.messagebox.showerror(master=window,
|
||||||
|
title='Untracked Error',
|
||||||
|
message=message)
|
||||||
|
print(traceback_text)
|
||||||
|
print(type(e).__name__, e)
|
||||||
|
print(message)
|
||||||
|
progress_var.set(0)
|
||||||
|
button_widget.configure(state=tk.NORMAL) # Enable Button
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
if len(os.listdir(enseExport)) == 0: # Check if the folder is empty
|
||||||
|
shutil.rmtree(folder_path)
|
||||||
|
|
||||||
|
update_progress(**progress_kwargs,
|
||||||
|
step=1)
|
||||||
|
|
||||||
|
print('Done!')
|
||||||
|
|
||||||
|
os.remove('temp.wav')
|
||||||
|
|
||||||
|
progress_var.set(0)
|
||||||
|
text_widget.write(f'Conversions Completed!\n')
|
||||||
|
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') # nopep8
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
button_widget.configure(state=tk.NORMAL) # Enable Button
|
||||||
Reference in New Issue
Block a user