diff --git a/VocalRemover.py b/VocalRemover.py index 984df7a..1b0b4da 100644 --- a/VocalRemover.py +++ b/VocalRemover.py @@ -22,14 +22,15 @@ from collections import defaultdict import queue import threading # Run the algorithm inside a thread + from pathlib import Path - import inference_v5 -import win32gui, win32con +import inference_v5_ensemble +# import win32gui, win32con -the_program_to_hide = win32gui.GetForegroundWindow() -win32gui.ShowWindow(the_program_to_hide , win32con.SW_HIDE) +# the_program_to_hide = win32gui.GetForegroundWindow() +# win32gui.ShowWindow(the_program_to_hide , win32con.SW_HIDE) # Change the current working directory to the directory # this file sits in @@ -44,19 +45,21 @@ os.chdir(base_path) # Change the current working directory to the base path instrumentalModels_dir = os.path.join(base_path, 'models') banner_path = os.path.join(base_path, 'img', 'UVR-banner.png') -refresh_path = os.path.join(base_path, 'img', 'refresh.png') +efile_path = os.path.join(base_path, 'img', 'file.png') DEFAULT_DATA = { 'exportPath': '', 'inputPaths': [], 'gpu': False, 'postprocess': False, 'tta': False, + 'save': True, 'output_image': False, 'window_size': '512', 'agg': 10, 'modelFolder': False, 'modelInstrumentalLabel': '', - #'aiModel': 'v5', + 'aiModel': 'Single Model', + 'ensChoose': 'HP1 Models', 'useModel': 'instrumental', 'lastDir': None, } @@ -196,7 +199,7 @@ class MainWindow(TkinterDnD.Tk): PADDING = 10 COL1_ROWS = 6 - COL2_ROWS = 5 + COL2_ROWS = 6 COL3_ROWS = 6 def __init__(self): @@ -223,7 +226,7 @@ class MainWindow(TkinterDnD.Tk): # --Variables-- self.logo_img = open_image(path=banner_path, size=(self.winfo_width(), 9999)) - self.refresh_img = open_image(path=refresh_path, + self.efile_img = open_image(path=efile_path, size=(20, 20)) self.instrumentalLabel_to_path = defaultdict(lambda: '') self.lastInstrumentalModels = [] @@ -236,6 +239,7 @@ class MainWindow(TkinterDnD.Tk): self.gpuConversion_var = tk.BooleanVar(value=data['gpu']) self.postprocessing_var = tk.BooleanVar(value=data['postprocess']) self.tta_var = tk.BooleanVar(value=data['tta']) + self.save_var = tk.BooleanVar(value=data['save']) self.outputImage_var = tk.BooleanVar(value=data['output_image']) # Models self.instrumentalModel_var = tk.StringVar(value=data['modelInstrumentalLabel']) @@ -245,8 +249,10 @@ class MainWindow(TkinterDnD.Tk): self.winSize_var = tk.StringVar(value=data['window_size']) self.agg_var = tk.StringVar(value=data['agg']) # AI model - #self.aiModel_var = tk.StringVar(value=data['aiModel']) - #self.last_aiModel = self.aiModel_var.get() + self.aiModel_var = tk.StringVar(value=data['aiModel']) + self.last_aiModel = self.aiModel_var.get() + self.ensChoose_var = tk.StringVar(value=data['ensChoose']) + self.last_ensChoose = self.ensChoose_var.get() # Other self.inputPathsEntry_var = tk.StringVar(value='') self.lastDir = data['lastDir'] # nopep8 @@ -277,9 +283,9 @@ class MainWindow(TkinterDnD.Tk): self.conversion_Button = ttk.Button(master=self, text='Start Conversion', command=self.start_conversion) - self.refresh_Button = ttk.Button(master=self, - image=self.refresh_img, - command=self.restart) + self.efile_Button = ttk.Button(master=self, + image=self.efile_img, + command=self.open_newModel_filedialog) self.progressbar = ttk.Progressbar(master=self, variable=self.progress_var) @@ -288,7 +294,7 @@ class MainWindow(TkinterDnD.Tk): background='#a0a0a0', borderwidth=0,) self.command_Text.write(f'COMMAND LINE [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]') # nopep8 - + def configure_widgets(self): """Change widget styling and appearance""" @@ -322,7 +328,7 @@ class MainWindow(TkinterDnD.Tk): relx=0, rely=0, relwidth=1, relheight=0) self.conversion_Button.place(x=10, y=self.IMAGE_HEIGHT + self.FILEPATHS_HEIGHT + self.OPTIONS_HEIGHT + self.PADDING*2, width=-20 - 40, height=self.CONVERSIONBUTTON_HEIGHT, relx=0, rely=0, relwidth=1, relheight=0) - self.refresh_Button.place(x=-10 - 35, y=self.IMAGE_HEIGHT + self.FILEPATHS_HEIGHT + self.OPTIONS_HEIGHT + self.PADDING*2, width=35, height=self.CONVERSIONBUTTON_HEIGHT, + self.efile_Button.place(x=-10 - 35, y=self.IMAGE_HEIGHT + self.FILEPATHS_HEIGHT + self.OPTIONS_HEIGHT + self.PADDING*2, width=35, height=self.CONVERSIONBUTTON_HEIGHT, relx=1, rely=0, relwidth=0, relheight=0) self.command_Text.place(x=15, y=self.IMAGE_HEIGHT + self.FILEPATHS_HEIGHT + self.OPTIONS_HEIGHT + self.CONVERSIONBUTTON_HEIGHT + self.PADDING*3, width=-30, height=self.COMMAND_HEIGHT, relx=0, rely=0, relwidth=1, relheight=0) @@ -380,11 +386,18 @@ class MainWindow(TkinterDnD.Tk): text='TTA', variable=self.tta_var, ) + # Save Ensemble Outputs + self.options_save_Checkbutton = ttk.Checkbutton(master=self.options_Frame, + text='Save All Outputs', + variable=self.save_var, + ) # Save Image self.options_image_Checkbutton = ttk.Checkbutton(master=self.options_Frame, text='Output Image', variable=self.outputImage_var, ) + + # Model Test Mode self.options_modelFolder_Checkbutton = ttk.Checkbutton(master=self.options_Frame, text='Model Test Mode', variable=self.modelFolder_var, @@ -407,12 +420,20 @@ class MainWindow(TkinterDnD.Tk): background='#404040', font=self.font, foreground='white', relief="groove") # AI model - # self.options_aiModel_Label = tk.Label(master=self.options_Frame, - # text='Choose AI Engine', anchor=tk.CENTER, - # background='#63605f', font=self.font, foreground='white', relief="sunken") - # self.options_aiModel_Optionmenu = ttk.OptionMenu(self.options_Frame, - # self.aiModel_var, - # None, 'v5') + self.options_aiModel_Label = tk.Label(master=self.options_Frame, + text='Choose Conversion Method', anchor=tk.CENTER, + background='#404040', font=self.font, foreground='white', relief="groove") + self.options_aiModel_Optionmenu = ttk.OptionMenu(self.options_Frame, + self.aiModel_var, + None, 'Single Model', 'Ensemble Mode') + # Ensemble Mode + self.options_ensChoose_Label = tk.Label(master=self.options_Frame, + text='Choose Ensemble', anchor=tk.CENTER, + background='#404040', font=self.font, foreground='white', relief="groove") + self.options_ensChoose_Optionmenu = ttk.OptionMenu(self.options_Frame, + self.ensChoose_var, + None, 'HP1 Models', 'HP2 Models', 'All HP Models', 'Vocal Models') + # "Save to", "Select Your Audio File(s)"", and "Start Conversion" Button Style @@ -428,10 +449,10 @@ class MainWindow(TkinterDnD.Tk): self.instrumentalModel_var) # Add Open Export Directory Button - self.options_export_Button = ttk.Button(master=self.options_Frame, - text='Open Export Directory', - style="Bold.TButton", - command=self.open_newModel_filedialog) + # self.options_export_Button = ttk.Button(master=self.options_Frame, + # text='Open Export Directory', + # style="Bold.TButton", + # command=self.open_newModel_filedialog) # -Place Widgets- # -Column 1- self.options_gpu_Checkbutton.place(x=0, y=0, width=0, height=0, @@ -440,6 +461,8 @@ class MainWindow(TkinterDnD.Tk): relx=0, rely=1/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS) self.options_tta_Checkbutton.place(x=0, y=0, width=0, height=0, relx=0, rely=2/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS) + self.options_save_Checkbutton.place(x=0, y=0, width=0, height=0, + relx=0, rely=4/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS) self.options_image_Checkbutton.place(x=0, y=0, width=0, height=0, relx=0, rely=3/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS) self.options_modelFolder_Checkbutton.place(x=0, y=0, width=0, height=0, @@ -447,12 +470,22 @@ class MainWindow(TkinterDnD.Tk): # -Column 2- + self.options_instrumentalModel_Label.place(x=-15, y=6, width=0, height=-10, - relx=1/3, rely=0/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + relx=1/3, rely=2/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) self.options_instrumentalModel_Optionmenu.place(x=-15, y=6, width=0, height=-10, - relx=1/3, rely=1/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) - self.options_export_Button.place(x=0, y=0, width=-30, height=-8, relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + + + self.options_ensChoose_Label.place(x=-15, y=6, width=0, height=-10, + relx=1/3, rely=0/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + self.options_ensChoose_Optionmenu.place(x=-15, y=6, width=0, height=-10, + relx=1/3, rely=1/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + + + + # self.options_export_Button.place(x=0, y=0, width=-30, height=-8, + # relx=2/3, rely=4/self.COL3_ROWS, relwidth=1/3, relheight=1/self.COL3_ROWS) # -Column 3- @@ -469,15 +502,17 @@ class MainWindow(TkinterDnD.Tk): relx=2/3, rely=3/self.COL3_ROWS, relwidth=1/3, relheight=1/self.COL3_ROWS) - # AI model - # self.options_aiModel_Label.place(x=5, y=-5, width=-30, height=-8, - # relx=1/3, rely=5/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) - # self.options_aiModel_Optionmenu.place(x=5, y=-5, width=-30, height=-8, - # relx=1/3, rely=6/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + # Conversion Method + self.options_aiModel_Label.place(x=-15, y=6, width=0, height=-10, + relx=1/3, rely=0/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + self.options_aiModel_Optionmenu.place(x=-15, y=4, width=0, height=-10, + relx=1/3, rely=1/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + + # Model deselect - # self.aiModel_var.trace_add('write', - # lambda *args: self.deselect_models()) + self.aiModel_var.trace_add('write', + lambda *args: self.deselect_models()) # Opening filedialogs def open_file_filedialog(self): @@ -532,6 +567,7 @@ class MainWindow(TkinterDnD.Tk): else: window_size = int(self.winSize_var.get()) agg = int(self.agg_var.get()) + ensChoose = str(self.ensChoose_var.get()) except ValueError: # Non integer was put in entry box tk.messagebox.showwarning(master=self, title='Invalid Input', @@ -551,25 +587,25 @@ class MainWindow(TkinterDnD.Tk): message='You have selected an invalid music file! Please make sure that the file still exists!', detail=f'File path: {path}') return - if not os.path.isfile(instrumentalModel_path): - tk.messagebox.showwarning(master=self, - title='Invalid Main Model File', - message='You have selected an invalid main model file!\nPlease make sure that your model file still exists!') - return + if self.aiModel_var.get() == 'Single Model': + if not os.path.isfile(instrumentalModel_path): + tk.messagebox.showwarning(master=self, + title='Invalid Main Model File', + message='You have selected an invalid main model file!\nPlease make sure that your model file still exists!') + return + if not os.path.isdir(export_path): tk.messagebox.showwarning(master=self, title='Invalid Export Directory', message='You have selected an invalid export directory!\nPlease make sure that your directory still exists!') return - # if self.aiModel_var.get() == 'v4': - # inference = inference_v4 - # elif self.aiModel_var.get() == 'v5': - # inference = inference_v5 - # else: - # raise TypeError('This error should not occur.') - - inference = inference_v5 + if self.aiModel_var.get() == 'Single Model': + inference = inference_v5 + elif self.aiModel_var.get() == 'Ensemble Mode': + inference = inference_v5_ensemble + else: + raise TypeError('This error should not occur.') # -Run the algorithm- threading.Thread(target=inference.main, @@ -580,7 +616,8 @@ class MainWindow(TkinterDnD.Tk): # Processing Options 'gpu': 0 if self.gpuConversion_var.get() else -1, 'postprocess': self.postprocessing_var.get(), - 'tta': self.tta_var.get(), # not needed for v2 + 'tta': self.tta_var.get(), + 'save': self.save_var.get(), 'output_image': self.outputImage_var.get(), # Models 'instrumentalModel': instrumentalModel_path, @@ -591,10 +628,12 @@ class MainWindow(TkinterDnD.Tk): # Constants 'window_size': window_size, 'agg': agg, + 'ensChoose': ensChoose, # Other Variables (Tkinter) 'window': self, 'text_widget': self.command_Text, 'button_widget': self.conversion_Button, + 'inst_menu': self.options_instrumentalModel_Optionmenu, 'progress_var': self.progress_var, }, daemon=True @@ -647,23 +686,55 @@ class MainWindow(TkinterDnD.Tk): on certain selections """ - # Models - # self.options_instrumentalModel_Label.configure(foreground='#000') - # self.options_instrumentalModel_Optionmenu.configure(state=tk.NORMAL) # nopep8 - - # if self.aiModel_var.get() == 'v5': - # self.options_tta_Checkbutton.configure(state=tk.NORMAL) - # self.options_agg_Label.place(x=5, y=-5, width=-30, height=-8, - # relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) - # self.options_agg_Entry.place(x=5, y=-4, width=-30, height=-8, - # relx=1/3, rely=4/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + if self.aiModel_var.get() == 'Single Model': + self.options_ensChoose_Label.place_forget() + self.options_ensChoose_Optionmenu.place_forget() + self.options_save_Checkbutton.configure(state=tk.DISABLED) + self.options_save_Checkbutton.place_forget() + self.options_modelFolder_Checkbutton.configure(state=tk.NORMAL) + self.options_modelFolder_Checkbutton.place(x=0, y=0, width=0, height=0, + relx=0, rely=4/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS) + self.options_instrumentalModel_Label.place(x=-15, y=6, width=0, height=-10, + relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + self.options_instrumentalModel_Optionmenu.place(x=-15, y=6, width=0, height=-10, + relx=1/3, rely=4/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + else: + self.options_instrumentalModel_Label.place_forget() + self.options_instrumentalModel_Optionmenu.place_forget() + self.options_modelFolder_Checkbutton.place_forget() + self.options_modelFolder_Checkbutton.configure(state=tk.DISABLED) + self.options_save_Checkbutton.configure(state=tk.NORMAL) + self.options_save_Checkbutton.place(x=0, y=0, width=0, height=0, + relx=0, rely=4/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS) + self.options_ensChoose_Label.place(x=-15, y=6, width=0, height=-10, + relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + self.options_ensChoose_Optionmenu.place(x=-15, y=6, width=0, height=-10, + relx=1/3, rely=4/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) - # else: - # self.options_tta_Checkbutton.configure(state=tk.NORMAL) - # self.options_agg_Label.place(x=5, y=-5, width=-30, height=-8, - # relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) - # self.options_agg_Entry.place(x=5, y=-4, width=-30, height=-8, - # relx=1/3, rely=4/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + if self.aiModel_var.get() == 'Ensemble Mode': + self.options_instrumentalModel_Label.place_forget() + self.options_instrumentalModel_Optionmenu.place_forget() + self.options_modelFolder_Checkbutton.place_forget() + self.options_modelFolder_Checkbutton.configure(state=tk.DISABLED) + self.options_save_Checkbutton.configure(state=tk.NORMAL) + self.options_save_Checkbutton.place(x=0, y=0, width=0, height=0, + relx=0, rely=4/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS) + self.options_ensChoose_Label.place(x=-15, y=6, width=0, height=-10, + relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + self.options_ensChoose_Optionmenu.place(x=-15, y=6, width=0, height=-10, + relx=1/3, rely=4/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + else: + self.options_ensChoose_Label.place_forget() + self.options_ensChoose_Optionmenu.place_forget() + self.options_save_Checkbutton.configure(state=tk.DISABLED) + self.options_save_Checkbutton.place_forget() + self.options_modelFolder_Checkbutton.configure(state=tk.NORMAL) + self.options_modelFolder_Checkbutton.place(x=0, y=0, width=0, height=0, + relx=0, rely=4/self.COL1_ROWS, relwidth=1/3, relheight=1/self.COL1_ROWS) + self.options_instrumentalModel_Label.place(x=-15, y=6, width=0, height=-10, + relx=1/3, rely=3/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) + self.options_instrumentalModel_Optionmenu.place(x=-15, y=6, width=0, height=-10, + relx=1/3, rely=4/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) self.update_inputPaths() @@ -678,23 +749,26 @@ class MainWindow(TkinterDnD.Tk): self.last_aiModel = self.aiModel_var.get() self.instrumentalModel_var.set('') + self.ensChoose_var.set('HP1 Models') self.winSize_var.set(DEFAULT_DATA['window_size']) self.agg_var.set(DEFAULT_DATA['agg']) + self.modelFolder_var.set(DEFAULT_DATA['modelFolder']) + self.update_available_models() self.update_states() - def restart(self): - """ - Restart the application after asking for confirmation - """ - save = tk.messagebox.askyesno(title='Confirmation', - message='The application will restart. Do you want to save the data?') - if save: - self.save_values() - subprocess.Popen(f'python "{__file__}"', shell=True) - exit() + # def restart(self): + # """ + # Restart the application after asking for confirmation + # """ + # save = tk.messagebox.askyesno(title='Confirmation', + # message='The application will restart. Do you want to save the data?') + # if save: + # self.save_values() + # subprocess.Popen(f'..App\Python\python.exe "{__file__}"') + # exit() def save_values(self): """ @@ -716,6 +790,7 @@ class MainWindow(TkinterDnD.Tk): 'gpu': self.gpuConversion_var.get(), 'postprocess': self.postprocessing_var.get(), 'tta': self.tta_var.get(), + 'save': self.save_var.get(), 'output_image': self.outputImage_var.get(), 'window_size': window_size, 'agg': agg, @@ -723,7 +798,8 @@ class MainWindow(TkinterDnD.Tk): 'lastDir': self.lastDir, 'modelFolder': self.modelFolder_var.get(), 'modelInstrumentalLabel': self.instrumentalModel_var.get(), - #'aiModel': self.aiModel_var.get(), + 'aiModel': self.aiModel_var.get(), + 'ensChoose': self.ensChoose_var.get(), }) self.destroy() diff --git a/inference_v5.py b/inference_v5.py index 423b4d4..d27c871 100644 --- a/inference_v5.py +++ b/inference_v5.py @@ -30,70 +30,8 @@ class VocalRemover(object): self.text_widget = text_widget self.models = defaultdict(lambda: None) self.devices = defaultdict(lambda: None) - self._load_models() # self.offset = model.offset - - def _load_models(self): - self.text_widget.write('Loading models...\n') # nopep8 Write Command Text - - nn_arch_sizes = [ - 31191, # default - 33966, 123821, 123812, 537238 # custom - ] - global args - global model_params_d - - p = argparse.ArgumentParser() - p.add_argument('--paramone', type=str, default='lib_v5/modelparams/4band_44100.json') - p.add_argument('--paramtwo', type=str, default='lib_v5/modelparams/4band_v2.json') - p.add_argument('--paramthree', type=str, default='lib_v5/modelparams/3band_44100_msb2.json') - p.add_argument('--paramfour', type=str, default='lib_v5/modelparams/4band_v2_sn.json') - p.add_argument('--aggressiveness',type=float, default=data['agg']/100) - p.add_argument('--nn_architecture', type=str, choices= ['auto'] + list('{}KB'.format(s) for s in nn_arch_sizes), default='auto') - p.add_argument('--high_end_process', type=str, default='mirroring') - args = p.parse_args() - - if 'auto' == args.nn_architecture: - model_size = math.ceil(os.stat(data['instrumentalModel']).st_size / 1024) - args.nn_architecture = '{}KB'.format(min(nn_arch_sizes, key=lambda x:abs(x-model_size))) - - nets = importlib.import_module('lib_v5.nets' + f'_{args.nn_architecture}'.replace('_{}KB'.format(nn_arch_sizes[0]), ''), package=None) - - ModelName=(data['instrumentalModel']) - - ModelParam1="4BAND_44100" - ModelParam2="4BAND_44100_B" - ModelParam3="MSB2" - ModelParam4="4BAND_44100_SN" - - if ModelParam1 in ModelName: - model_params_d=args.paramone - if ModelParam2 in ModelName: - model_params_d=args.paramtwo - if ModelParam3 in ModelName: - model_params_d=args.paramthree - if ModelParam4 in ModelName: - model_params_d=args.paramfour - - print(model_params_d) - - mp = ModelParameters(model_params_d) - - # -Instrumental- - if os.path.isfile(data['instrumentalModel']): - device = torch.device('cpu') - model = nets.CascadedASPPNet(mp.param['bins'] * 2) - model.load_state_dict(torch.load(self.data['instrumentalModel'], - map_location=device)) - if torch.cuda.is_available() and self.data['gpu'] >= 0: - device = torch.device('cuda:{}'.format(self.data['gpu'])) - model.to(device) - - self.models['instrumental'] = model - self.devices['instrumental'] = device - - self.text_widget.write('Done!\n') data = { # Paths @@ -152,6 +90,26 @@ def determineModelFolderName(): def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress_var: tk.Variable, **kwargs: dict): + + global args + global model_params_d + global nn_arch_sizes + + nn_arch_sizes = [ + 31191, # default + 33966, 123821, 123812, 537238 # custom + ] + + p = argparse.ArgumentParser() + p.add_argument('--paramone', type=str, default='lib_v5/modelparams/4band_44100.json') + p.add_argument('--paramtwo', type=str, default='lib_v5/modelparams/4band_v2.json') + p.add_argument('--paramthree', type=str, default='lib_v5/modelparams/3band_44100_msb2.json') + p.add_argument('--paramfour', type=str, default='lib_v5/modelparams/4band_v2_sn.json') + p.add_argument('--aggressiveness',type=float, default=data['agg']/100) + p.add_argument('--nn_architecture', type=str, choices= ['auto'] + list('{}KB'.format(s) for s in nn_arch_sizes), default='auto') + p.add_argument('--high_end_process', type=str, default='mirroring') + args = p.parse_args() + def save_files(wav_instrument, wav_vocals): """Save output music files""" @@ -215,210 +173,255 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress os.mkdir(folder_path) # Separation Preperation - try: - for file_num, music_file in enumerate(data['input_paths'], start=1): - # Determine File Name - base_name = f'{data["export_path"]}{modelFolderName}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' - # Start Separation - model_name = os.path.basename(data[f'{data["useModel"]}Model']) - model = vocal_remover.models[data['useModel']] - device = vocal_remover.devices[data['useModel']] - - # -Get text and update progress- - base_text = get_baseText(total_files=len(data['input_paths']), - file_num=file_num) - progress_kwargs = {'progress_var': progress_var, - 'total_files': len(data['input_paths']), - 'file_num': file_num} - update_progress(**progress_kwargs, - step=0) - - mp = ModelParameters(model_params_d) + try: #Load File(s) + for file_num, music_file in enumerate(data['input_paths'], start=1): + # Determine File Name + base_name = f'{data["export_path"]}{modelFolderName}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' - # -Go through the different steps of seperation- - # Wave source - text_widget.write(base_text + 'Loading wave source...\n') - - X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {} - - bands_n = len(mp.param['band']) - - for d in range(bands_n, 0, -1): - bp = mp.param['band'][d] - - if d == bands_n: # high-end band - X_wave[d], _ = librosa.load( - music_file, bp['sr'], False, dtype=np.float32, res_type=bp['res_type']) - - if X_wave[d].ndim == 1: - X_wave[d] = np.asarray([X_wave[d], X_wave[d]]) - else: # lower bands - X_wave[d] = librosa.resample(X_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type']) + model_name = os.path.basename(data[f'{data["useModel"]}Model']) + model = vocal_remover.models[data['useModel']] + device = vocal_remover.devices[data['useModel']] + # -Get text and update progress- + base_text = get_baseText(total_files=len(data['input_paths']), + file_num=file_num) + progress_kwargs = {'progress_var': progress_var, + 'total_files': len(data['input_paths']), + 'file_num': file_num} + update_progress(**progress_kwargs, + step=0) + + + #Load Model(s) + text_widget.write(base_text + 'Loading models...') + + + if 'auto' == args.nn_architecture: + model_size = math.ceil(os.stat(data['instrumentalModel']).st_size / 1024) + args.nn_architecture = '{}KB'.format(min(nn_arch_sizes, key=lambda x:abs(x-model_size))) + + nets = importlib.import_module('lib_v5.nets' + f'_{args.nn_architecture}'.replace('_{}KB'.format(nn_arch_sizes[0]), ''), package=None) + + ModelName=(data['instrumentalModel']) + + ModelParam1="4BAND_44100" + ModelParam2="4BAND_44100_B" + ModelParam3="MSB2" + ModelParam4="4BAND_44100_SN" + + if ModelParam1 in ModelName: + model_params_d=args.paramone + if ModelParam2 in ModelName: + model_params_d=args.paramtwo + if ModelParam3 in ModelName: + model_params_d=args.paramthree + if ModelParam4 in ModelName: + model_params_d=args.paramfour - # Stft of wave source + print('Model Parameters:', model_params_d) - X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(X_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'], - mp.param['mid_side_b2'], mp.param['reverse']) + mp = ModelParameters(model_params_d) - if d == bands_n and args.high_end_process != 'none': - input_high_end_h = (bp['n_fft']//2 - bp['crop_stop']) + (mp.param['pre_filter_stop'] - mp.param['pre_filter_start']) - input_high_end = X_spec_s[d][:, bp['n_fft']//2-input_high_end_h:bp['n_fft']//2, :] - - text_widget.write(base_text + 'Done!\n') - - update_progress(**progress_kwargs, - step=0.1) - - text_widget.write(base_text + 'Stft of wave source...\n') - - text_widget.write(base_text + 'Done!\n') - - text_widget.write(base_text + "Please Wait..\n") - - X_spec_m = spec_utils.combine_spectrograms(X_spec_s, mp) - - del X_wave, X_spec_s - - def inference(X_spec, device, model, aggressiveness): - - def _execute(X_mag_pad, roi_size, n_window, device, model, aggressiveness): - model.eval() + # -Instrumental- + if os.path.isfile(data['instrumentalModel']): + device = torch.device('cpu') + model = nets.CascadedASPPNet(mp.param['bins'] * 2) + model.load_state_dict(torch.load(data['instrumentalModel'], + map_location=device)) + if torch.cuda.is_available() and data['gpu'] >= 0: + device = torch.device('cuda:{}'.format(data['gpu'])) + model.to(device) - with torch.no_grad(): - preds = [] - - iterations = [n_window] + vocal_remover.models['instrumental'] = model + vocal_remover.devices['instrumental'] = device - total_iterations = sum(iterations) + text_widget.write(' Done!\n') + + model_name = os.path.basename(data[f'{data["useModel"]}Model']) + + mp = ModelParameters(model_params_d) - text_widget.write(base_text + "Length: "f"{total_iterations} Slices\n") - - for i in tqdm(range(n_window)): - update_progress(**progress_kwargs, - step=(0.1 + (0.8/n_window * i))) - start = i * roi_size - X_mag_window = X_mag_pad[None, :, :, start:start + data['window_size']] - X_mag_window = torch.from_numpy(X_mag_window).to(device) - - pred = model.predict(X_mag_window, aggressiveness) - - pred = pred.detach().cpu().numpy() - preds.append(pred[0]) + # -Go through the different steps of seperation- + # Wave source + text_widget.write(base_text + 'Loading wave source...') + + X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {} + + bands_n = len(mp.param['band']) + + for d in range(bands_n, 0, -1): + bp = mp.param['band'][d] + + if d == bands_n: # high-end band + X_wave[d], _ = librosa.load( + music_file, bp['sr'], False, dtype=np.float32, res_type=bp['res_type']) - pred = np.concatenate(preds, axis=2) + if X_wave[d].ndim == 1: + X_wave[d] = np.asarray([X_wave[d], X_wave[d]]) + else: # lower bands + X_wave[d] = librosa.resample(X_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type']) + + # Stft of wave source - return pred + X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(X_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'], + mp.param['mid_side_b2'], mp.param['reverse']) + + if d == bands_n and args.high_end_process != 'none': + input_high_end_h = (bp['n_fft']//2 - bp['crop_stop']) + (mp.param['pre_filter_stop'] - mp.param['pre_filter_start']) + input_high_end = X_spec_s[d][:, bp['n_fft']//2-input_high_end_h:bp['n_fft']//2, :] - def preprocess(X_spec): - X_mag = np.abs(X_spec) - X_phase = np.angle(X_spec) + text_widget.write('Done!\n') - return X_mag, X_phase + update_progress(**progress_kwargs, + step=0.1) + + text_widget.write(base_text + 'Stft of wave source...') - X_mag, X_phase = preprocess(X_spec) - - coef = X_mag.max() - X_mag_pre = X_mag / coef - - n_frame = X_mag_pre.shape[2] - pad_l, pad_r, roi_size = dataset.make_padding(n_frame, - data['window_size'], model.offset) - n_window = int(np.ceil(n_frame / roi_size)) - - X_mag_pad = np.pad( - X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant') + text_widget.write(' Done!\n') - pred = _execute(X_mag_pad, roi_size, n_window, - device, model, aggressiveness) - pred = pred[:, :, :n_frame] + text_widget.write(base_text + "Please Wait...\n") + + X_spec_m = spec_utils.combine_spectrograms(X_spec_s, mp) - if data['tta']: - pad_l += roi_size // 2 - pad_r += roi_size // 2 - n_window += 1 + del X_wave, X_spec_s + + def inference(X_spec, device, model, aggressiveness): + + def _execute(X_mag_pad, roi_size, n_window, device, model, aggressiveness): + model.eval() + + with torch.no_grad(): + preds = [] + + iterations = [n_window] + + total_iterations = sum(iterations) + + text_widget.write(base_text + "Processing "f"{total_iterations} Slices... ") + + for i in tqdm(range(n_window)): + update_progress(**progress_kwargs, + step=(0.1 + (0.8/n_window * i))) + start = i * roi_size + X_mag_window = X_mag_pad[None, :, :, start:start + data['window_size']] + X_mag_window = torch.from_numpy(X_mag_window).to(device) + + pred = model.predict(X_mag_window, aggressiveness) + + pred = pred.detach().cpu().numpy() + preds.append(pred[0]) + + pred = np.concatenate(preds, axis=2) + text_widget.write('Done!\n') + return pred + + def preprocess(X_spec): + X_mag = np.abs(X_spec) + X_phase = np.angle(X_spec) + + return X_mag, X_phase + + X_mag, X_phase = preprocess(X_spec) + + coef = X_mag.max() + X_mag_pre = X_mag / coef + + n_frame = X_mag_pre.shape[2] + pad_l, pad_r, roi_size = dataset.make_padding(n_frame, + data['window_size'], model.offset) + n_window = int(np.ceil(n_frame / roi_size)) X_mag_pad = np.pad( X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant') + + pred = _execute(X_mag_pad, roi_size, n_window, + device, model, aggressiveness) + pred = pred[:, :, :n_frame] + + if data['tta']: + pad_l += roi_size // 2 + pad_r += roi_size // 2 + n_window += 1 - pred_tta = _execute(X_mag_pad, roi_size, n_window, - device, model, aggressiveness) - pred_tta = pred_tta[:, :, roi_size // 2:] - pred_tta = pred_tta[:, :, :n_frame] + X_mag_pad = np.pad( + X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant') - return (pred + pred_tta) * 0.5 * coef, X_mag, np.exp(1.j * X_phase) + pred_tta = _execute(X_mag_pad, roi_size, n_window, + device, model, aggressiveness) + pred_tta = pred_tta[:, :, roi_size // 2:] + pred_tta = pred_tta[:, :, :n_frame] + + return (pred + pred_tta) * 0.5 * coef, X_mag, np.exp(1.j * X_phase) + else: + return pred * coef, X_mag, np.exp(1.j * X_phase) + + + aggressiveness = {'value': args.aggressiveness, 'split_bin': mp.param['band'][1]['crop_stop']} + + + if data['tta']: + text_widget.write(base_text + "Running Inferences (TTA)...\n") else: - return pred * coef, X_mag, np.exp(1.j * X_phase) - - - aggressiveness = {'value': args.aggressiveness, 'split_bin': mp.param['band'][1]['crop_stop']} - - - if data['tta']: - text_widget.write(base_text + "Running Inferences (TTA)...\n") - else: - text_widget.write(base_text + "Running Inference...\n") - - pred, X_mag, X_phase = inference(X_spec_m, - device, - model, aggressiveness) - - text_widget.write(base_text + 'Done!\n') - - update_progress(**progress_kwargs, - step=0.9) - # Postprocess - if data['postprocess']: - text_widget.write(base_text + 'Post processing...\n') - pred_inv = np.clip(X_mag - pred, 0, np.inf) - pred = spec_utils.mask_silence(pred, pred_inv) - text_widget.write(base_text + 'Done!\n') + text_widget.write(base_text + "Running Inference...\n") + + pred, X_mag, X_phase = inference(X_spec_m, + device, + model, aggressiveness) update_progress(**progress_kwargs, - step=0.95) + step=0.9) + # Postprocess + if data['postprocess']: + text_widget.write(base_text + 'Post processing...') + pred_inv = np.clip(X_mag - pred, 0, np.inf) + pred = spec_utils.mask_silence(pred, pred_inv) + text_widget.write(' Done!\n') - # Inverse stft - text_widget.write(base_text + 'Inverse stft of instruments and vocals...\n') # nopep8 - y_spec_m = pred * X_phase - v_spec_m = X_spec_m - y_spec_m - - if args.high_end_process.startswith('mirroring'): - input_high_end_ = spec_utils.mirroring(args.high_end_process, y_spec_m, input_high_end, mp) - - wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp, input_high_end_h, input_high_end_) - else: - wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp) + update_progress(**progress_kwargs, + step=0.95) - if args.high_end_process.startswith('mirroring'): - input_high_end_ = spec_utils.mirroring(args.high_end_process, v_spec_m, input_high_end, mp) + # Inverse stft + text_widget.write(base_text + 'Inverse stft of instruments and vocals...') # nopep8 + y_spec_m = pred * X_phase + v_spec_m = X_spec_m - y_spec_m + + if args.high_end_process.startswith('mirroring'): + input_high_end_ = spec_utils.mirroring(args.high_end_process, y_spec_m, input_high_end, mp) - wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp, input_high_end_h, input_high_end_) - else: - wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp) - - text_widget.write(base_text + 'Done!\n') + wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp, input_high_end_h, input_high_end_) + else: + wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp) - update_progress(**progress_kwargs, - step=1) - # Save output music files - text_widget.write(base_text + 'Saving Files...\n') - save_files(wav_instrument, wav_vocals) - text_widget.write(base_text + 'Done!\n') + if args.high_end_process.startswith('mirroring'): + input_high_end_ = spec_utils.mirroring(args.high_end_process, v_spec_m, input_high_end, mp) - update_progress(**progress_kwargs, - step=1) + wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp, input_high_end_h, input_high_end_) + else: + wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp) + + text_widget.write('Done!\n') - # Save output image - if data['output_image']: - with open('{}_Instruments.jpg'.format(base_name), mode='wb') as f: - image = spec_utils.spectrogram_to_image(y_spec_m) - _, bin_image = cv2.imencode('.jpg', image) - bin_image.tofile(f) - with open('{}_Vocals.jpg'.format(base_name), mode='wb') as f: - image = spec_utils.spectrogram_to_image(v_spec_m) - _, bin_image = cv2.imencode('.jpg', image) - bin_image.tofile(f) + update_progress(**progress_kwargs, + step=1) + # Save output music files + text_widget.write(base_text + 'Saving Files...') + save_files(wav_instrument, wav_vocals) + text_widget.write(' Done!\n') - text_widget.write(base_text + 'Completed Seperation!\n\n') + update_progress(**progress_kwargs, + step=1) + + # Save output image + if data['output_image']: + with open('{}_Instruments.jpg'.format(base_name), mode='wb') as f: + image = spec_utils.spectrogram_to_image(y_spec_m) + _, bin_image = cv2.imencode('.jpg', image) + bin_image.tofile(f) + with open('{}_Vocals.jpg'.format(base_name), mode='wb') as f: + image = spec_utils.spectrogram_to_image(v_spec_m) + _, bin_image = cv2.imencode('.jpg', image) + bin_image.tofile(f) + + text_widget.write(base_text + 'Completed Seperation!\n\n') except Exception as e: traceback_text = ''.join(traceback.format_tb(e.__traceback__)) message = f'Traceback Error: "{traceback_text}"\n{type(e).__name__}: "{e}"\nFile: {music_file}\nPlease contact the creator and attach a screenshot of this error with the file and settings that caused it!' @@ -433,9 +436,9 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress return os.remove('temp.wav') - + progress_var.set(0) - text_widget.write(f'Conversion(s) Completed and Saving all Files!\n') + text_widget.write(f'\nConversion(s) Completed!\n') text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') # nopep8 torch.cuda.empty_cache() button_widget.configure(state=tk.NORMAL) # Enable Button \ No newline at end of file diff --git a/inference_v5_ensemble.py b/inference_v5_ensemble.py new file mode 100644 index 0000000..f43460a --- /dev/null +++ b/inference_v5_ensemble.py @@ -0,0 +1,623 @@ +from functools import total_ordering +import pprint +import argparse +import os +from statistics import mode + +import cv2 +import librosa +import numpy as np +import soundfile as sf +import shutil +from tqdm import tqdm + +from lib_v5 import dataset +from lib_v5 import spec_utils +from lib_v5.model_param_init import ModelParameters +import torch + +# Command line text parsing and widget manipulation +from collections import defaultdict +import tkinter as tk +import traceback # Error Message Recent Calls +import time # Timer + +class VocalRemover(object): + + def __init__(self, data, text_widget: tk.Text): + self.data = data + self.text_widget = text_widget + # self.offset = model.offset + + +data = { + # Paths + 'input_paths': None, + 'export_path': None, + # Processing Options + 'gpu': -1, + 'postprocess': True, + 'tta': True, + 'save': True, + 'output_image': True, + # Models + 'instrumentalModel': None, + 'useModel': None, + # Constants + 'window_size': 512, + 'agg': 10, + 'ensChoose': 'HP1 Models' +} + +default_window_size = data['window_size'] +default_agg = data['agg'] + +def update_progress(progress_var, total_files, file_num, step: float = 1): + """Calculate the progress for the progress widget in the GUI""" + base = (100 / total_files) + progress = base * (file_num - 1) + progress += base * step + + progress_var.set(progress) + + +def get_baseText(total_files, file_num): + """Create the base text for the command widget""" + text = 'File {file_num}/{total_files} '.format(file_num=file_num, + total_files=total_files) + return text + +def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress_var: tk.Variable, + **kwargs: dict): + + global args + global nn_arch_sizes + + nn_arch_sizes = [ + 31191, # default + 33966, 123821, 123812, 537238 # custom + ] + + + p = argparse.ArgumentParser() + p.add_argument('--aggressiveness',type=float, default=data['agg']/100) + p.add_argument('--high_end_process', type=str, default='mirroring') + args = p.parse_args() + + + def save_files(wav_instrument, wav_vocals): + """Save output music files""" + vocal_name = '(Vocals)' + instrumental_name = '(Instrumental)' + save_path = os.path.dirname(base_name) + + # Swap names if vocal model + + VModel="Vocal" + + if VModel in model_name: + # Reverse names + vocal_name, instrumental_name = instrumental_name, vocal_name + + # Save Temp File + # For instrumental the instrumental is the temp file + # and for vocal the instrumental is the temp file due + # to reversement + sf.write(f'temp.wav', + wav_instrument, mp.param['sr']) + + # -Save files- + # Instrumental + if instrumental_name is not None: + instrumental_path = '{save_path}/{file_name}.wav'.format( + save_path=save_path, + file_name = f'{os.path.basename(base_name)}_{ModelName_1}_{instrumental_name}', + ) + + sf.write(instrumental_path, + wav_instrument, mp.param['sr']) + # Vocal + if vocal_name is not None: + vocal_path = '{save_path}/{file_name}.wav'.format( + save_path=save_path, + file_name=f'{os.path.basename(base_name)}_{ModelName_1}_{vocal_name}', + ) + sf.write(vocal_path, + wav_vocals, mp.param['sr']) + + data.update(kwargs) + + # Update default settings + global default_window_size + global default_agg + default_window_size = data['window_size'] + default_agg = data['agg'] + + stime = time.perf_counter() + progress_var.set(0) + text_widget.clear() + button_widget.configure(state=tk.DISABLED) # Disable Button + + # Separation Preperation + try: #Load File(s) + + + HP1_Models = [ + { + 'model_name':'HP_4BAND_44100_A', + 'model_params':'lib_v5/modelparams/4band_44100.json', + 'model_location':'models/Main Models/HP_4BAND_44100_A.pth', + 'using_archtecture': '123821KB', + 'loop_name': 'Ensemble Mode - Model 1/2' + }, + { + 'model_name':'HP_4BAND_44100_B', + 'model_params':'lib_v5/modelparams/4band_v2.json', + 'model_location':'models/Main Models/HP_4BAND_44100_B.pth', + 'using_archtecture': '123821KB', + 'loop_name': 'Ensemble Mode - Model 2/2' + } + ] + + HP2_Models = [ + { + 'model_name':'HP2_4BAND_44100_1', + 'model_params':'lib_v5/modelparams/4band_44100.json', + 'model_location':'models/Main Models/HP2_4BAND_44100_1.pth', + 'using_archtecture': '537238KB', + 'loop_name': 'Ensemble Mode - Model 1/3' + + }, + { + 'model_name':'HP2_4BAND_44100_2', + 'model_params':'lib_v5/modelparams/4band_44100.json', + 'model_location':'models/Main Models/HP2_4BAND_44100_2.pth', + 'using_archtecture': '537238KB', + 'loop_name': 'Ensemble Mode - Model 2/3' + + }, + { + 'model_name':'HP2_3BAND_44100_MSB2', + 'model_params':'lib_v5/modelparams/3band_44100_msb2.json', + 'model_location':'models/Main Models/HP2_3BAND_44100_MSB2.pth', + 'using_archtecture': '537227KB', + 'loop_name': 'Ensemble Mode - Model 3/3' + } + ] + + All_HP_Models = [ + { + 'model_name':'HP_4BAND_44100_A', + 'model_params':'lib_v5/modelparams/4band_44100.json', + 'model_location':'models/Main Models/HP_4BAND_44100_A.pth', + 'using_archtecture': '123821KB', + 'loop_name': 'Ensemble Mode - Model 1/5' + }, + { + 'model_name':'HP_4BAND_44100_B', + 'model_params':'lib_v5/modelparams/4band_v2.json', + 'model_location':'models/Main Models/HP_4BAND_44100_B.pth', + 'using_archtecture': '123821KB', + 'loop_name': 'Ensemble Mode - Model 2/5' + }, + { + 'model_name':'HP2_4BAND_44100_1', + 'model_params':'lib_v5/modelparams/4band_44100.json', + 'model_location':'models/Main Models/HP2_4BAND_44100_1.pth', + 'using_archtecture': '537238KB', + 'loop_name': 'Ensemble Mode - Model 3/5' + + }, + { + 'model_name':'HP2_4BAND_44100_2', + 'model_params':'lib_v5/modelparams/4band_44100.json', + 'model_location':'models/Main Models/HP2_4BAND_44100_2.pth', + 'using_archtecture': '537238KB', + 'loop_name': 'Ensemble Mode - Model 4/5' + + }, + { + 'model_name':'HP2_3BAND_44100_MSB2', + 'model_params':'lib_v5/modelparams/3band_44100_msb2.json', + 'model_location':'models/Main Models/HP2_3BAND_44100_MSB2.pth', + 'using_archtecture': '537227KB', + 'loop_name': 'Ensemble Mode - Model 5/5' + } + ] + + + Vocal_Models = [ + { + 'model_name':'HP_Vocal_4BAND_44100', + 'model_params':'lib_v5/modelparams/4band_44100.json', + 'model_location':'models/Main Models/HP_Vocal_4BAND_44100.pth', + 'using_archtecture': '123821KB', + 'loop_name': 'Ensemble Mode - Model 1/2' + }, + { + 'model_name':'HP_Vocal_AGG_4BAND_44100', + 'model_params':'lib_v5/modelparams/4band_44100.json', + 'model_location':'models/Main Models/HP_Vocal_AGG_4BAND_44100.pth', + 'using_archtecture': '123821KB', + 'loop_name': 'Ensemble Mode - Model 2/2' + } + ] + + if data['ensChoose'] == 'HP1 Models': + loops = HP1_Models + ensefolder = 'HP_Models_Saved_Outputs' + ensemode = 'HP_Models' + if data['ensChoose'] == 'HP2 Models': + loops = HP2_Models + ensefolder = 'HP2_Models_Saved_Outputs' + ensemode = 'HP2_Models' + if data['ensChoose'] == 'All HP Models': + loops = All_HP_Models + ensefolder = 'All_HP_Models_Saved_Outputs' + ensemode = 'All_HP_Models' + if data['ensChoose'] == 'Vocal Models': + loops = Vocal_Models + ensefolder = 'Vocal_Models_Saved_Outputs' + ensemode = 'Vocal_Models' + + + + + for file_num, music_file in enumerate(data['input_paths'], start=1): + + # -Get text and update progress- + base_text = get_baseText(total_files=len(data['input_paths']), + file_num=file_num) + progress_kwargs = {'progress_var': progress_var, + 'total_files': len(data['input_paths']), + 'file_num': file_num} + update_progress(**progress_kwargs, + step=0) + + + #Load Model(s) + #text_widget.write(base_text + 'Loading models...') + + for i, c in tqdm(enumerate(loops), disable=True, desc='Iterations..'): + + text_widget.write(c['loop_name'] + '\n\n') + + text_widget.write(base_text + 'Loading ' + c['model_name'] + '... ') + + arch_now = c['using_archtecture'] + + if arch_now == '123821KB': + from lib_v5 import nets_123821KB as nets + elif arch_now == '537238KB': + from lib_v5 import nets_537238KB as nets + elif arch_now == '537227KB': + from lib_v5 import nets_537227KB as nets + + def determineenseFolderName(): + """ + Determine the name that is used for the folder and appended + to the back of the music files + """ + enseFolderName = '' + + # -Instrumental- + if str(ensefolder): + enseFolderName += os.path.splitext(os.path.basename(ensefolder))[0] + + if enseFolderName: + enseFolderName = '/' + enseFolderName + + return enseFolderName + + enseFolderName = determineenseFolderName() + if enseFolderName: + folder_path = f'{data["export_path"]}{enseFolderName}' + if not os.path.isdir(folder_path): + os.mkdir(folder_path) + + # Determine File Name + base_name = f'{data["export_path"]}{enseFolderName}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' + enseExport = f'{data["export_path"]}{enseFolderName}/' + trackname = f'{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' + + ModelName_1=(c['model_name']) + + print('Model Parameters:', c['model_params']) + + mp = ModelParameters(c['model_params']) + + # -Instrumental- + if os.path.isfile(c['model_location']): + device = torch.device('cpu') + model = nets.CascadedASPPNet(mp.param['bins'] * 2) + model.load_state_dict(torch.load(c['model_location'], + map_location=device)) + if torch.cuda.is_available() and data['gpu'] >= 0: + device = torch.device('cuda:{}'.format(data['gpu'])) + model.to(device) + + text_widget.write('Done!\n') + + model_name = os.path.basename(c["model_name"]) + + + # -Go through the different steps of seperation- + # Wave source + text_widget.write(base_text + 'Loading wave source... ') + + X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {} + + bands_n = len(mp.param['band']) + + for d in range(bands_n, 0, -1): + bp = mp.param['band'][d] + + if d == bands_n: # high-end band + X_wave[d], _ = librosa.load( + music_file, bp['sr'], False, dtype=np.float32, res_type=bp['res_type']) + + if X_wave[d].ndim == 1: + X_wave[d] = np.asarray([X_wave[d], X_wave[d]]) + else: # lower bands + X_wave[d] = librosa.resample(X_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type']) + + # Stft of wave source + + X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(X_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'], + mp.param['mid_side_b2'], mp.param['reverse']) + + if d == bands_n and args.high_end_process != 'none': + input_high_end_h = (bp['n_fft']//2 - bp['crop_stop']) + (mp.param['pre_filter_stop'] - mp.param['pre_filter_start']) + input_high_end = X_spec_s[d][:, bp['n_fft']//2-input_high_end_h:bp['n_fft']//2, :] + + text_widget.write('Done!\n') + + update_progress(**progress_kwargs, + step=0.1) + + text_widget.write(base_text + 'Stft of wave source... ') + + text_widget.write('Done!\n') + + text_widget.write(base_text + "Please Wait...\n") + + X_spec_m = spec_utils.combine_spectrograms(X_spec_s, mp) + + del X_wave, X_spec_s + + def inference(X_spec, device, model, aggressiveness): + + def _execute(X_mag_pad, roi_size, n_window, device, model, aggressiveness): + model.eval() + + with torch.no_grad(): + preds = [] + + iterations = [n_window] + + total_iterations = sum(iterations) + + text_widget.write(base_text + "Processing "f"{total_iterations} Slices... ") + + for i in tqdm(range(n_window)): + update_progress(**progress_kwargs, + step=(0.1 + (0.8/n_window * i))) + start = i * roi_size + X_mag_window = X_mag_pad[None, :, :, start:start + data['window_size']] + X_mag_window = torch.from_numpy(X_mag_window).to(device) + + pred = model.predict(X_mag_window, aggressiveness) + + pred = pred.detach().cpu().numpy() + preds.append(pred[0]) + + pred = np.concatenate(preds, axis=2) + + text_widget.write('Done!\n') + return pred + + def preprocess(X_spec): + X_mag = np.abs(X_spec) + X_phase = np.angle(X_spec) + + return X_mag, X_phase + + X_mag, X_phase = preprocess(X_spec) + + coef = X_mag.max() + X_mag_pre = X_mag / coef + + n_frame = X_mag_pre.shape[2] + pad_l, pad_r, roi_size = dataset.make_padding(n_frame, + data['window_size'], model.offset) + n_window = int(np.ceil(n_frame / roi_size)) + + X_mag_pad = np.pad( + X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant') + + pred = _execute(X_mag_pad, roi_size, n_window, + device, model, aggressiveness) + pred = pred[:, :, :n_frame] + + if data['tta']: + pad_l += roi_size // 2 + pad_r += roi_size // 2 + n_window += 1 + + X_mag_pad = np.pad( + X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant') + + pred_tta = _execute(X_mag_pad, roi_size, n_window, + device, model, aggressiveness) + pred_tta = pred_tta[:, :, roi_size // 2:] + pred_tta = pred_tta[:, :, :n_frame] + + return (pred + pred_tta) * 0.5 * coef, X_mag, np.exp(1.j * X_phase) + else: + return pred * coef, X_mag, np.exp(1.j * X_phase) + + aggressiveness = {'value': args.aggressiveness, 'split_bin': mp.param['band'][1]['crop_stop']} + + + if data['tta']: + text_widget.write(base_text + "Running Inferences (TTA)... \n") + else: + text_widget.write(base_text + "Running Inference... \n") + + pred, X_mag, X_phase = inference(X_spec_m, + device, + model, aggressiveness) + + + + update_progress(**progress_kwargs, + step=0.85) + # Postprocess + if data['postprocess']: + text_widget.write(base_text + 'Post processing... ') + pred_inv = np.clip(X_mag - pred, 0, np.inf) + pred = spec_utils.mask_silence(pred, pred_inv) + text_widget.write('Done!\n') + + update_progress(**progress_kwargs, + step=0.85) + + # Inverse stft + text_widget.write(base_text + 'Inverse stft of instruments and vocals... ') # nopep8 + y_spec_m = pred * X_phase + v_spec_m = X_spec_m - y_spec_m + + if args.high_end_process.startswith('mirroring'): + input_high_end_ = spec_utils.mirroring(args.high_end_process, y_spec_m, input_high_end, mp) + + wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp, input_high_end_h, input_high_end_) + else: + wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp) + + if args.high_end_process.startswith('mirroring'): + input_high_end_ = spec_utils.mirroring(args.high_end_process, v_spec_m, input_high_end, mp) + + wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp, input_high_end_h, input_high_end_) + else: + wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp) + + text_widget.write('Done!\n') + + update_progress(**progress_kwargs, + step=0.9) + # Save output music files + text_widget.write(base_text + 'Saving Files... ') + save_files(wav_instrument, wav_vocals) + text_widget.write('Done!\n') + + + + # Save output image + if data['output_image']: + with open('{}_Instruments.jpg'.format(base_name), mode='wb') as f: + image = spec_utils.spectrogram_to_image(y_spec_m) + _, bin_image = cv2.imencode('.jpg', image) + bin_image.tofile(f) + with open('{}_Vocals.jpg'.format(base_name), mode='wb') as f: + image = spec_utils.spectrogram_to_image(v_spec_m) + _, bin_image = cv2.imencode('.jpg', image) + bin_image.tofile(f) + + text_widget.write(base_text + 'Completed Seperation!\n\n') + + # Emsembling Outputs + + def get_files(folder="", prefix="", suffix=""): + return [f"{folder}{i}" for i in os.listdir(folder) if i.startswith(prefix) if i.endswith(suffix)] + + ensambles = [ + { + 'algorithm':'min_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), + 'output':'{}_Ensembled_{}_Instrumentals'.format(trackname, ensemode), + 'type': 'Instrumentals' + }, + { + 'algorithm':'max_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), + 'output': '{}_Ensembled_{}_Vocals'.format(trackname, ensemode), + 'type': 'Vocals' + } + ] + + for i, e in tqdm(enumerate(ensambles), desc="Ensembling..."): + + text_widget.write(base_text + "Ensembling " + e['type'] + "... ") + + wave, specs = {}, {} + + mp = ModelParameters(e['model_params']) + + for i in range(len(e['files'])): + spec = {} + + for d in range(len(mp.param['band']), 0, -1): + bp = mp.param['band'][d] + + if d == len(mp.param['band']): # high-end band + wave[d], _ = librosa.load( + e['files'][i], bp['sr'], False, dtype=np.float32, res_type=bp['res_type']) + + if len(wave[d].shape) == 1: # mono to stereo + wave[d] = np.array([wave[d], wave[d]]) + else: # lower bands + wave[d] = librosa.resample(wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type']) + + spec[d] = spec_utils.wave_to_spectrogram(wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse']) + + specs[i] = spec_utils.combine_spectrograms(spec, mp) + + del wave + + sf.write(os.path.join('{}'.format(data['export_path']),'{}.wav'.format(e['output'])), + spec_utils.cmb_spectrogram_to_wave(spec_utils.ensembling(e['algorithm'], + specs), mp), mp.param['sr']) + + if not data['save']: # Deletes all outputs if Save All Outputs: is checked + files = e['files'] + for file in files: + os.remove(file) + + text_widget.write("Done!\n") + + + update_progress(**progress_kwargs, + step=0.95) + text_widget.write("\n") + + except Exception as e: + traceback_text = ''.join(traceback.format_tb(e.__traceback__)) + message = f'Traceback Error: "{traceback_text}"\n{type(e).__name__}: "{e}"\nFile: {music_file}\nPlease contact the creator and attach a screenshot of this error with the file and settings that caused it!' + tk.messagebox.showerror(master=window, + title='Untracked Error', + message=message) + print(traceback_text) + print(type(e).__name__, e) + print(message) + progress_var.set(0) + button_widget.configure(state=tk.NORMAL) # Enable Button + return + + + if len(os.listdir(enseExport)) == 0: # Check if the folder is empty + shutil.rmtree(folder_path) + + update_progress(**progress_kwargs, + step=1) + + print('Done!') + + os.remove('temp.wav') + + progress_var.set(0) + text_widget.write(f'Conversions Completed!\n') + text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') # nopep8 + torch.cuda.empty_cache() + button_widget.configure(state=tk.NORMAL) # Enable Button