diff --git a/UVR.py b/UVR.py index c9b16c4..eaa5382 100644 --- a/UVR.py +++ b/UVR.py @@ -104,6 +104,7 @@ DEFAULT_DATA = { 'break': False, #Advanced Options 'appendensem': False, + 'demucs_only': False, #MDX-Net 'demucsmodel': True, 'non_red': False, @@ -113,9 +114,17 @@ DEFAULT_DATA = { 'chunks': 'Auto', 'n_fft_scale': 6144, 'dim_f': 2048, + 'overlap': 0.5, + 'shifts': 0, + 'margin': 44100, + 'channel': 64, + 'compensate': 1.03597672895, + 'mdxnetModeltype': 'Vocals (Custom)', 'noisereduc_s': '3', - 'mixing': 'default', - 'mdxnetModel': 'UVR-MDX-NET-1', + 'mixing': 'Default', + 'mdxnetModel': 'UVR-MDX-NET 1', + 'DemucsModel': 'demucs_extra-3646af93_org.th', + 'ModelParams': 'Auto', } def open_image(path: str, size: tuple = None, keep_aspect: bool = True, rotate: int = 0) -> ImageTk.PhotoImage: @@ -351,6 +360,9 @@ class MainWindow(TkinterDnD.Tk): self.lastInstrumentalModels = [] self.MDXLabel_to_path = defaultdict(lambda: '') self.lastMDXModels = [] + self.DemucsLabel_to_path = defaultdict(lambda: '') + self.ModelParamsLabel_to_path = defaultdict(lambda: '') + self.lastModelParams = [] # -Tkinter Value Holders- data = load_data() @@ -373,6 +385,7 @@ class MainWindow(TkinterDnD.Tk): self.mdxensemchoose_b_var = tk.StringVar(value=data['mdx_ensem_b']) #Advanced Options self.appendensem_var = tk.BooleanVar(value=data['appendensem']) + self.demucs_only_var = tk.BooleanVar(value=data['demucs_only']) # Processing Options self.gpuConversion_var = tk.BooleanVar(value=data['gpu']) self.postprocessing_var = tk.BooleanVar(value=data['postprocess']) @@ -395,6 +408,12 @@ class MainWindow(TkinterDnD.Tk): self.agg_var = tk.StringVar(value=data['agg']) self.n_fft_scale_var = tk.StringVar(value=data['n_fft_scale']) self.dim_f_var = tk.StringVar(value=data['dim_f']) + self.overlap_var = tk.StringVar(value=data['overlap']) + self.shifts_var = tk.StringVar(value=data['shifts']) + self.channel_var = tk.StringVar(value=data['channel']) + self.margin_var = tk.StringVar(value=data['margin']) + self.compensate_var = tk.StringVar(value=data['compensate']) + self.mdxnetModeltype_var = tk.StringVar(value=data['mdxnetModeltype']) # Instrumental or Vocal Only self.voc_only_var = tk.BooleanVar(value=data['voc_only']) self.inst_only_var = tk.BooleanVar(value=data['inst_only']) @@ -409,6 +428,8 @@ class MainWindow(TkinterDnD.Tk): self.last_ensChoose = self.ensChoose_var.get() # Choose MDX-NET Model self.mdxnetModel_var = tk.StringVar(value=data['mdxnetModel']) + self.DemucsModel_var = tk.StringVar(value=data['DemucsModel']) + self.ModelParams_var = tk.StringVar(value=data['ModelParams']) self.last_mdxnetModel = self.mdxnetModel_var.get() # Other self.inputPathsEntry_var = tk.StringVar(value='') @@ -432,8 +453,8 @@ class MainWindow(TkinterDnD.Tk): def create_widgets(self): """Create window widgets""" - self.title_Label = tk.Button(master=self, - image=self.logo_img, compound=tk.TOP, borderwidth=0, command=self.open_appdir_filedialog) + self.title_Label = tk.Label(master=self, + image=self.logo_img, compound=tk.TOP) self.filePaths_Frame = ttk.Frame(master=self) self.fill_filePaths_Frame() @@ -463,8 +484,7 @@ class MainWindow(TkinterDnD.Tk): self.progressbar = ttk.Progressbar(master=self, variable=self.progress_var) self.command_Text = ThreadSafeConsole(master=self, - background='#0e0e0f',fg='#898b8e', font=('Century Gothic', 11), - borderwidth=0,) + background='#0e0e0f',fg='#898b8e', font=('Century Gothic', 11),borderwidth=0) self.command_Text.write(f'Ultimate Vocal Remover [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]\n') @@ -588,12 +608,13 @@ class MainWindow(TkinterDnD.Tk): # -Column 1- # Choose Conversion Method - self.options_aiModel_Label = tk.Label(master=self.options_Frame, + self.options_aiModel_Label = tk.Button(master=self.options_Frame, text='Choose Process Method', anchor=tk.CENTER, - background='#0e0e0f', font=self.font, foreground='#13a4c9') + background='#0e0e0f', font=self.font, foreground='#13a4c9', borderwidth=0, command=self.open_appdir_filedialog) self.options_aiModel_Optionmenu = ttk.OptionMenu(self.options_Frame, self.aiModel_var, None, 'VR Architecture', 'MDX-Net', 'Ensemble Mode') + # Choose Instrumental Model self.options_instrumentalModel_Label = tk.Button(master=self.options_Frame, text='Choose Main Model', @@ -628,16 +649,18 @@ class MainWindow(TkinterDnD.Tk): # -Column 2- # WINDOW SIZE - self.options_winSize_Label = tk.Label(master=self.options_Frame, + self.options_winSize_Label = tk.Button(master=self.options_Frame, text='Window Size', anchor=tk.CENTER, - background='#0e0e0f', font=self.font, foreground='#13a4c9') + background='#0e0e0f', font=self.font, foreground='#13a4c9', + borderwidth=0, command=self.advanced_vr_options) self.options_winSize_Optionmenu = ttk.OptionMenu(self.options_Frame, self.winSize_var, None, '320', '512','1024') # MDX-chunks - self.options_chunks_Label = tk.Label(master=self.options_Frame, + self.options_chunks_Label = tk.Button(master=self.options_Frame, text='Chunks', - background='#0e0e0f', font=self.font, foreground='#13a4c9') + background='#0e0e0f', font=self.font, foreground='#13a4c9', + borderwidth=0, command=self.advanced_mdx_options) self.options_chunks_Optionmenu = ttk.OptionMenu(self.options_Frame, self.chunks_var, None, 'Auto', '1', '5', '10', '15', '20', @@ -683,9 +706,10 @@ class MainWindow(TkinterDnD.Tk): # -Column 3- # AGG - self.options_agg_Label = tk.Label(master=self.options_Frame, + self.options_agg_Label = tk.Button(master=self.options_Frame, text='Aggression Setting', - background='#0e0e0f', font=self.font, foreground='#13a4c9') + background='#0e0e0f', font=self.font, foreground='#13a4c9', + borderwidth=0, command=self.advanced_vr_options) self.options_agg_Optionmenu = ttk.OptionMenu(self.options_Frame, self.agg_var, None, '1', '2', '3', '4', '5', @@ -694,9 +718,10 @@ class MainWindow(TkinterDnD.Tk): '18', '19', '20') # MDX-noisereduc_s - self.options_noisereduc_s_Label = tk.Label(master=self.options_Frame, + self.options_noisereduc_s_Label = tk.Button(master=self.options_Frame, text='Noise Reduction', - background='#0e0e0f', font=self.font, foreground='#13a4c9') + background='#0e0e0f', font=self.font, foreground='#13a4c9', + borderwidth=0, command=self.advanced_mdx_options) self.options_noisereduc_s_Optionmenu = ttk.OptionMenu(self.options_Frame, self.noisereduc_s_var, None, 'None', '0', '1', '2', '3', '4', '5', @@ -848,7 +873,7 @@ class MainWindow(TkinterDnD.Tk): self.inst_only_var.trace_add('write', lambda *args: self.update_states()) - + self.voc_only_var.trace_add('write', lambda *args: self.update_states()) self.noisereduc_s_var.trace_add('write', @@ -856,6 +881,21 @@ class MainWindow(TkinterDnD.Tk): self.non_red_var.trace_add('write', lambda *args: self.update_states()) + self.mdxnetModeltype_var.trace_add('write', + lambda *args: self.update_states()) + + self.n_fft_scale_var.trace_add('write', + lambda *args: self.update_states()) + + self.dim_f_var.trace_add('write', + lambda *args: self.update_states()) + + self.demucsmodel_var.trace_add('write', + lambda *args: self.update_states()) + + self.demucs_only_var.trace_add('write', + lambda *args: self.update_states()) + # Opening filedialogs def open_file_filedialog(self): """Make user select music files""" @@ -1007,6 +1047,7 @@ class MainWindow(TkinterDnD.Tk): 'gpu': 0 if self.gpuConversion_var.get() else -1, 'postprocess': self.postprocessing_var.get(), 'appendensem': self.appendensem_var.get(), + 'demucs_only': self.demucs_only_var.get(), 'tta': self.tta_var.get(), 'save': self.save_var.get(), 'output_image': self.outputImage_var.get(), @@ -1023,6 +1064,8 @@ class MainWindow(TkinterDnD.Tk): 'break': False, 'ensChoose': ensChoose, 'mdxnetModel': mdxnetModel, + 'DemucsModel': self.DemucsModel_var.get(), + 'ModelParams': self.ModelParams_var.get(), # Other Variables (Tkinter) 'window': self, 'text_widget': self.command_Text, @@ -1040,6 +1083,12 @@ class MainWindow(TkinterDnD.Tk): 'mixing': mixing, 'n_fft_scale': self.n_fft_scale_var.get(), 'dim_f': self.dim_f_var.get(), + 'overlap': self.overlap_var.get(), + 'shifts': self.shifts_var.get(), + 'margin': self.margin_var.get(), + 'channel': self.channel_var.get(), + 'compensate': self.compensate_var.get(), + 'mdxnetModeltype': self.mdxnetModeltype_var.get(), }, daemon=True ).start() @@ -1061,11 +1110,11 @@ class MainWindow(TkinterDnD.Tk): """Update the dropdown menu""" self.update_available_models() - self.after(3000, self.update_loop) + self.after(1000, self.update_loop) def update_available_models(self): """ - Loop through every model (.pth) in the models directory + Loop through every VR model (.pth) in the models directory and add to the select your model list """ temp_instrumentalModels_dir = os.path.join(instrumentalModels_dir, 'Main_Models') # nopep8 @@ -1086,16 +1135,17 @@ class MainWindow(TkinterDnD.Tk): self.lastInstrumentalModels = new_InstrumentalModels #print(self.instrumentalLabel_to_path) - - - + """ + Loop through every MDX-Net model (.onnx) in the models directory + and add to the select your model list + """ + temp_MDXModels_dir = os.path.join(instrumentalModels_dir, 'MDX_Net_Models') # nopep8 # MDX-Net new_MDXModels = os.listdir(temp_MDXModels_dir) if new_MDXModels != self.lastMDXModels: - #print(new_MDXModels) self.MDXLabel_to_path.clear() self.options_mdxnetModel_Optionmenu['menu'].delete(0, 'end') for file_name_1 in natsort.natsorted(new_MDXModels): @@ -1104,30 +1154,72 @@ class MainWindow(TkinterDnD.Tk): for char in b: file_name_1 = file_name_1.replace(char, "") - c = ["UVR_MDXNET_9662"] + c = ["UVR_MDXNET_3_9662"] for char in c: file_name_1 = file_name_1.replace(char, "UVR-MDX-NET 3") - d = ["UVR_MDXNET_9682"] + d = ["UVR_MDXNET_2_9682"] for char in d: file_name_1 = file_name_1.replace(char, "UVR-MDX-NET 2") - e = ["UVR_MDXNET_9703"] + e = ["UVR_MDXNET_1_9703"] for char in e: file_name_1 = file_name_1.replace(char, "UVR-MDX-NET 1") f = ["UVR_MDXNET_KARA"] for char in f: file_name_1 = file_name_1.replace(char, "UVR-MDX-NET Karaoke") - - #file_name = f'{os.path.basename(path)}' - - print(file_name_1) self.options_mdxnetModel_Optionmenu['menu'].add_radiobutton(label=file_name_1, command=tk._setit(self.mdxnetModel_var, file_name_1)) self.lastMDXModels = new_MDXModels + + """ + Loop through every Demucs model (.th, .pth) in the models directory + and add to the select your model list + """ + + try: + temp_DemucsModels_dir = os.path.join(instrumentalModels_dir, 'Demucs_Model') # nopep8 + new_DemucsModels = os.listdir(temp_DemucsModels_dir) + + if new_DemucsModels != self.lastDemucsModels: + #print(new_MDXModels) + self.DemucsLabel_to_path.clear() + self.options_DemucsModel_Optionmenu['menu'].delete(0, 'end') + for file_name_2 in natsort.natsorted(new_DemucsModels): + if file_name_2.endswith(('.th', '.pth')): + + self.options_DemucsModel_Optionmenu['menu'].add_radiobutton(label=file_name_2, + command=tk._setit(self.DemucsModel_var, file_name_2)) + self.lastDemucsModels = new_DemucsModels + except: + pass + + + """ + Loop through every model param (.json) in the models directory + and add to the select your model list + """ + + try: + temp_ModelParams_dir = 'lib_v5\modelparams' # nopep8 + new_ModelParams = os.listdir(temp_ModelParams_dir) + + if new_ModelParams != self.lastModelParams: + #print(new_MDXModels) + self.ModelParamsLabel_to_path.clear() + self.options_ModelParams_Optionmenu['menu'].delete(0, 'end') + for file_name_3 in natsort.natsorted(new_ModelParams): + if file_name_3.endswith(('.json', 'Auto')): + + self.options_ModelParams_Optionmenu['menu'].add_radiobutton(label=file_name_3, + command=tk._setit(self.ModelParams_var, file_name_3)) + self.lastModelParams = new_ModelParams + except: + pass + def update_states(self): """ @@ -1460,7 +1552,91 @@ class MainWindow(TkinterDnD.Tk): self.non_red_var.set(False) if not self.noisereduc_s_var.get() == 'None': self.options_non_red_Checkbutton.configure(state=tk.NORMAL) - + + + if self.mdxnetModeltype_var.get() == 'Vocals (Default)': + self.n_fft_scale_var.set('6144') + self.dim_f_var.set('2048') + try: + self.options_n_fft_scale_Entry.configure(state=tk.DISABLED) + self.options_dim_f_Entry.configure(state=tk.DISABLED) + self.options_n_fft_scale_Opt.configure(state=tk.DISABLED) + self.options_dim_f_Opt.configure(state=tk.DISABLED) + except: + pass + + if self.mdxnetModeltype_var.get() == 'Other (Default)': + self.n_fft_scale_var.set('8192') + self.dim_f_var.set('2048') + try: + self.options_n_fft_scale_Entry.configure(state=tk.DISABLED) + self.options_dim_f_Entry.configure(state=tk.DISABLED) + self.options_n_fft_scale_Opt.configure(state=tk.DISABLED) + self.options_dim_f_Opt.configure(state=tk.DISABLED) + except: + pass + + if self.mdxnetModeltype_var.get() == 'Drums (Default)': + self.n_fft_scale_var.set('4096') + self.dim_f_var.set('2048') + try: + self.options_n_fft_scale_Entry.configure(state=tk.DISABLED) + self.options_dim_f_Entry.configure(state=tk.DISABLED) + self.options_n_fft_scale_Opt.configure(state=tk.DISABLED) + self.options_dim_f_Opt.configure(state=tk.DISABLED) + except: + pass + + if self.mdxnetModeltype_var.get() == 'Bass (Default)': + self.n_fft_scale_var.set('16384') + self.dim_f_var.set('2048') + try: + self.options_n_fft_scale_Entry.configure(state=tk.DISABLED) + self.options_dim_f_Entry.configure(state=tk.DISABLED) + self.options_n_fft_scale_Opt.configure(state=tk.DISABLED) + self.options_dim_f_Opt.configure(state=tk.DISABLED) + except: + pass + + if self.mdxnetModeltype_var.get() == 'Vocals (Custom)': + try: + self.options_n_fft_scale_Entry.configure(state=tk.NORMAL) + self.options_dim_f_Entry.configure(state=tk.NORMAL) + self.options_n_fft_scale_Opt.configure(state=tk.NORMAL) + self.options_dim_f_Opt.configure(state=tk.NORMAL) + except: + pass + + if self.mdxnetModeltype_var.get() == 'Other (Custom)': + try: + self.options_n_fft_scale_Entry.configure(state=tk.NORMAL) + self.options_dim_f_Entry.configure(state=tk.NORMAL) + self.options_n_fft_scale_Opt.configure(state=tk.NORMAL) + self.options_dim_f_Opt.configure(state=tk.NORMAL) + except: + pass + + if self.mdxnetModeltype_var.get() == 'Drums (Custom)': + try: + self.options_n_fft_scale_Entry.configure(state=tk.NORMAL) + self.options_dim_f_Entry.configure(state=tk.NORMAL) + self.options_n_fft_scale_Opt.configure(state=tk.NORMAL) + self.options_dim_f_Opt.configure(state=tk.NORMAL) + except: + pass + + if self.mdxnetModeltype_var.get() == 'Bass (Custom)': + try: + self.options_n_fft_scale_Entry.configure(state=tk.NORMAL) + self.options_dim_f_Entry.configure(state=tk.NORMAL) + self.options_n_fft_scale_Opt.configure(state=tk.NORMAL) + self.options_dim_f_Opt.configure(state=tk.NORMAL) + except: + pass + + if self.demucs_only_var.get() == True: + self.demucsmodel_var.set(True) + self.update_inputPaths() @@ -1523,6 +1699,232 @@ class MainWindow(TkinterDnD.Tk): else: opener = "open" if sys.platform == "darwin" else "xdg-open" subprocess.call([opener, filename]) + + def advanced_vr_options(self): + """ + Open Help Guide + """ + top= Toplevel(self) + + top.geometry("600x500") + window_height = 600 + window_width = 500 + + top.title("Advanced VR Options") + + top.resizable(False, False) # This code helps to disable windows from resizing + + screen_width = top.winfo_screenwidth() + screen_height = top.winfo_screenheight() + + x_cordinate = int((screen_width/2) - (window_width/2)) + y_cordinate = int((screen_height/2) - (window_height/2)) + + top.geometry("{}x{}+{}+{}".format(window_width, window_height, x_cordinate, y_cordinate)) + + # change title bar icon + top.iconbitmap('img\\UVR-Icon-v2.ico') + + tabControl = ttk.Notebook(top) + + tab1 = ttk.Frame(tabControl) + + tabControl.add(tab1, text ='Advanced Settings') + + tabControl.pack(expand = 1, fill ="both") + + tab1.grid_rowconfigure(0, weight=1) + tab1.grid_columnconfigure(0, weight=1) + + frame0=Frame(tab1, highlightbackground='red',highlightthicknes=0) + frame0.grid(row=0,column=0,padx=0,pady=30) + + l0=tk.Label(frame0, text='Window Size (Set Manually)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=1,column=0,padx=0,pady=10) + + l0=ttk.Entry(frame0, textvariable=self.winSize_var, justify="center") + l0.grid(row=2,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='Aggression Setting (Set Manually)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=3,column=0,padx=0,pady=10) + + l0=ttk.Entry(frame0, textvariable=self.agg_var, justify="center") + l0.grid(row=4,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='Select Model Param\n(Can\'t change Model Params in Ensemble Mode)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=5,column=0,padx=0,pady=10) + + self.options_ModelParams_Optionmenu = l0=ttk.OptionMenu(frame0, self.ModelParams_var) + + self.options_ModelParams_Optionmenu + l0.grid(row=7,column=0,padx=0,pady=0) + + l0=ttk.Checkbutton(frame0, text='Save Output Image(s) of Spectrogram(s)', variable=self.outputImage_var) + l0.grid(row=8,column=0,padx=0,pady=10) + + self.ModelParamsLabel_to_path = defaultdict(lambda: '') + self.lastModelParams = [] + + self.update_states() + + def advanced_mdx_options(self): + """ + Open Help Guide + """ + top= Toplevel(self) + + top.geometry("600x550") + window_height = 600 + window_width = 550 + + top.title("Advanced MDX-Net Options") + + top.resizable(False, False) # This code helps to disable windows from resizing + + screen_width = top.winfo_screenwidth() + screen_height = top.winfo_screenheight() + + x_cordinate = int((screen_width/2) - (window_width/2)) + y_cordinate = int((screen_height/2) - (window_height/2)) + + top.geometry("{}x{}+{}+{}".format(window_width, window_height, x_cordinate, y_cordinate)) + + # change title bar icon + top.iconbitmap('img\\UVR-Icon-v2.ico') + + tabControl = ttk.Notebook(top) + + tab1 = ttk.Frame(tabControl) + tab2 = ttk.Frame(tabControl) + tab3 = ttk.Frame(tabControl) + + tabControl.add(tab1, text ='Advanced Settings') + tabControl.add(tab2, text ='Demucs Settings') + tabControl.add(tab3, text ='Advanced ONNX Model Settings') + + tabControl.pack(expand = 1, fill ="both") + + tab1.grid_rowconfigure(0, weight=1) + tab1.grid_columnconfigure(0, weight=1) + tab2.grid_rowconfigure(0, weight=1) + tab2.grid_columnconfigure(0, weight=1) + tab3.grid_rowconfigure(0, weight=1) + tab3.grid_columnconfigure(0, weight=1) + + frame0=Frame(tab1, highlightbackground='red',highlightthicknes=0) + frame0.grid(row=0,column=0,padx=0,pady=30) + + l0=tk.Label(frame0, text='Chunks (Set Manually)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=0,column=0,padx=0,pady=10) + + l0=ttk.Entry(frame0, textvariable=self.chunks_var, justify='center') + l0.grid(row=1,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='Noise Reduction (Set Manually)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=2,column=0,padx=0,pady=10) + + l0=ttk.Entry(frame0, textvariable=self.noisereduc_s_var, justify='center') + l0.grid(row=3,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='Chunk Margin', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=4,column=0,padx=0,pady=10) + + l0=ttk.Entry(frame0, textvariable=self.margin_var, justify='center') + l0.grid(row=5,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='Volume Compensation', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=6,column=0,padx=0,pady=10) + + l0=ttk.Entry(frame0, textvariable=self.compensate_var, justify='center') + l0.grid(row=7,column=0,padx=0,pady=0) + + frame0=Frame(tab2, highlightbackground='red',highlightthicknes=0) + frame0.grid(row=0,column=0,padx=0,pady=30) + + l0=tk.Label(frame0, text='Choose Demucs Model', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=0,column=0,padx=0,pady=0) + + self.options_DemucsModel_Optionmenu = l0=ttk.OptionMenu(frame0, self.DemucsModel_var) + + self.options_DemucsModel_Optionmenu + l0.grid(row=2,column=0,padx=0,pady=0) + + l0=tk.Button(frame0, text='(Click here to add more Demucs models)', font=("Century Gothic", "8"), foreground='#13a4c9', borderwidth=0, command=self.open_Modelfolder_de) + l0.grid(row=1,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='Mixing Algorithm', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=3,column=0,padx=0,pady=10) + + l0=ttk.OptionMenu(frame0, self.mixing_var, None, 'Default', 'Min_Mag', 'Max_Mag', 'Invert_p') + l0.grid(row=4,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='Channel', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=5,column=0,padx=0,pady=10) + + l0=ttk.Entry(frame0, textvariable=self.channel_var, justify='center') + l0.grid(row=6,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='Shifts\n(Higher values use more resources and increase processing times)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=7,column=0,padx=0,pady=10) + + l0=ttk.Entry(frame0, textvariable=self.shifts_var, justify='center') + l0.grid(row=8,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='Overlap', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=9,column=0,padx=0,pady=10) + + l0=ttk.Entry(frame0, textvariable=self.overlap_var, justify='center') + l0.grid(row=10,column=0,padx=0,pady=0) + + l0=ttk.Checkbutton(frame0, text='Run Demucs Model Only', variable=self.demucs_only_var) + l0.grid(row=12,column=0,padx=0,pady=10) + + frame0=Frame(tab3, highlightbackground='red',highlightthicknes=0) + frame0.grid(row=0,column=0,padx=0,pady=30) + + l0=tk.Label(frame0, text='Stem Type', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=1,column=0,padx=0,pady=0) + + l0=ttk.OptionMenu(frame0, self.mdxnetModeltype_var, None, 'Vocals (Default)', 'Other (Default)', 'Bass (Default)', 'Drums (Default)', 'Vocals (Custom)', 'Other (Custom)', 'Bass (Custom)', 'Drums (Custom)') + l0.grid(row=2,column=0,padx=0,pady=10) + + l0=tk.Label(frame0, text='N_FFT Scale', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=3,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='(Manual Set)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=3,column=1,padx=0,pady=0) + + self.options_n_fft_scale_Opt = l0=ttk.OptionMenu(frame0, self.n_fft_scale_var, None, '4096', '6144', '7680', '8192', '16384') + + self.options_n_fft_scale_Opt + l0.grid(row=4,column=0,padx=0,pady=0) + + self.options_n_fft_scale_Entry = l0=ttk.Entry(frame0, textvariable=self.n_fft_scale_var, justify='center') + + self.options_n_fft_scale_Entry + l0.grid(row=4,column=1,padx=0,pady=0) + + l0=tk.Label(frame0, text='Dim_f', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=5,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='(Manual Set)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=5,column=1,padx=0,pady=0) + + self.options_dim_f_Opt = l0=ttk.OptionMenu(frame0, self.dim_f_var, None, '2048', '3072', '4096') + + self.options_dim_f_Opt + l0.grid(row=6,column=0,padx=0,pady=0) + + self.options_dim_f_Entry = l0=ttk.Entry(frame0, textvariable=self.dim_f_var, justify='center') + + self.options_dim_f_Entry + l0.grid(row=6,column=1,padx=0,pady=0) + + self.DemucsLabel_to_path = defaultdict(lambda: '') + self.lastDemucsModels = [] + + self.update_states() + def custom_ensemble(self): """ @@ -1682,8 +2084,9 @@ class MainWindow(TkinterDnD.Tk): l0=ttk.Checkbutton(frame0, text='Post-Process (VR Architecture Only)', variable=self.postprocessing_var) l0.grid(row=3,column=2,padx=0,pady=0) - - + + l0=ttk.Button(frame0,text='Open Models Directory', command=self.open_Modelfolder_filedialog) + l0.grid(row=4,column=2,padx=0,pady=10) def help(self): """ @@ -1949,160 +2352,47 @@ class MainWindow(TkinterDnD.Tk): frame0=Frame(tab9,highlightbackground='red',highlightthicknes=0) frame0.grid(row=0,column=0,padx=0,pady=0) - l0=Label(frame0,text="MDX-Net/VR Ensemble Options",font=("Century Gothic", "10", "bold", "underline"), justify="center", fg="#f4f4f4") + l0=Label(frame0,text="Advanced Option Guide",font=("Century Gothic", "12", "bold", "underline"), fg="#f4f4f4") + l0.grid(row=0,column=0,padx=0,pady=0) + + l0=ttk.Button(frame0,text="Ensemble Customization Options", command=self.custom_ensemble) l0.grid(row=1,column=0,padx=20,pady=10) - l0=tk.Label(frame0,text='MDX-Net Model\n',font=("Century Gothic", "9", "bold"), justify="center", foreground='#13a4c9') - l0.grid(row=2,column=0,padx=0,pady=0) + l0=ttk.Button(frame0,text="Advanced MDX-Net Options", command=self.advanced_mdx_options) + l0.grid(row=2,column=0,padx=20,pady=10) - l0=ttk.OptionMenu(frame0, self.mdxensemchoose_var, None, 'UVR-MDX-NET 1', 'UVR-MDX-NET 2', 'UVR-MDX-NET 3', - 'UVR-MDX-NET Karaoke') - l0.grid(row=3,column=0,padx=0,pady=0) - - l0=tk.Label(frame0,text='\nVR Model 1\n',font=("Century Gothic", "9", "bold"), justify="center", foreground='#13a4c9') - l0.grid(row=4,column=0,padx=0,pady=0) - - l0=ttk.OptionMenu(frame0, self.vrensemchoose_var, None, 'No Model', '1_HP-UVR', '2_HP-UVR', '3_HP-Vocal-UVR', - '4_HP-Vocal-UVR', '5_HP-Karaoke-UVR', '6_HP-Karaoke-UVR', '7_HP2-UVR', '8_HP2-UVR', - '9_HP2-UVR', '10_SP-UVR-2B-32000-1', '11_SP-UVR-2B-32000-2', '12_SP-UVR-3B-44100', '13_SP-UVR-4B-44100-1', - '14_SP-UVR-4B-44100-2', '15_SP-UVR-MID-44100-1', '16_SP-UVR-MID-44100-2', - 'MGM_MAIN_v4', 'MGM_HIGHEND_v4', 'MGM_LOWEND_A_v4', 'MGM_LOWEND_B_v4') - l0.grid(row=5,column=0,padx=0,pady=0) - - l0=tk.Label(frame0,text='\nVR Model 2\n',font=("Century Gothic", "9", "bold"), justify="center", foreground='#13a4c9') - l0.grid(row=6,column=0,padx=0,pady=0) - - l0=ttk.OptionMenu(frame0, self.vrensemchoose_mdx_a_var, None, 'No Model', '1_HP-UVR', '2_HP-UVR', '3_HP-Vocal-UVR', - '4_HP-Vocal-UVR', '5_HP-Karaoke-UVR', '6_HP-Karaoke-UVR', '7_HP2-UVR', '8_HP2-UVR', - '9_HP2-UVR', '10_SP-UVR-2B-32000-1', '11_SP-UVR-2B-32000-2', '12_SP-UVR-3B-44100', '13_SP-UVR-4B-44100-1', - '14_SP-UVR-4B-44100-2', '15_SP-UVR-MID-44100-1', '16_SP-UVR-MID-44100-2', - 'MGM_MAIN_v4', 'MGM_HIGHEND_v4', 'MGM_LOWEND_A_v4', 'MGM_LOWEND_B_v4') - l0.grid(row=7,column=0,padx=0,pady=0) - - l0=tk.Label(frame0,text='\nVR Model 3\n',font=("Century Gothic", "9", "bold"), justify="center", foreground='#13a4c9') - l0.grid(row=8,column=0,padx=0,pady=0) - - l0=ttk.OptionMenu(frame0, self.vrensemchoose_mdx_b_var, None, 'No Model', '1_HP-UVR', '2_HP-UVR', '3_HP-Vocal-UVR', - '4_HP-Vocal-UVR', '5_HP-Karaoke-UVR', '6_HP-Karaoke-UVR', '7_HP2-UVR', '8_HP2-UVR', - '9_HP2-UVR', '10_SP-UVR-2B-32000-1', '11_SP-UVR-2B-32000-2', '12_SP-UVR-3B-44100', '13_SP-UVR-4B-44100-1', - '14_SP-UVR-4B-44100-2', '15_SP-UVR-MID-44100-1', '16_SP-UVR-MID-44100-2', - 'MGM_MAIN_v4', 'MGM_HIGHEND_v4', 'MGM_LOWEND_A_v4', 'MGM_LOWEND_B_v4') - l0.grid(row=9,column=0,padx=0,pady=0) - - l0=tk.Label(frame0,text='\nVR Model 4\n',font=("Century Gothic", "9", "bold"), justify="center", foreground='#13a4c9') - l0.grid(row=10,column=0,padx=0,pady=0) - - l0=ttk.OptionMenu(frame0, self.vrensemchoose_mdx_c_var, None, 'No Model', '1_HP-UVR', '2_HP-UVR', '3_HP-Vocal-UVR', - '4_HP-Vocal-UVR', '5_HP-Karaoke-UVR', '6_HP-Karaoke-UVR', '7_HP2-UVR', '8_HP2-UVR', - '9_HP2-UVR', '10_SP-UVR-2B-32000-1', '11_SP-UVR-2B-32000-2', '12_SP-UVR-3B-44100', '13_SP-UVR-4B-44100-1', - '14_SP-UVR-4B-44100-2', '15_SP-UVR-MID-44100-1', '16_SP-UVR-MID-44100-2', - 'MGM_MAIN_v4', 'MGM_HIGHEND_v4', 'MGM_LOWEND_A_v4', 'MGM_LOWEND_B_v4') - l0.grid(row=11,column=0,padx=0,pady=0) - - l0=tk.Label(frame0,text='\nMDX-Net Model 2\n',font=("Century Gothic", "9", "bold"), justify="center", foreground='#13a4c9') - l0.grid(row=12,column=0,padx=0,pady=0) - - l0=ttk.OptionMenu(frame0, self.mdxensemchoose_b_var, None, 'No Model', 'UVR-MDX-NET 1', 'UVR-MDX-NET 2', 'UVR-MDX-NET 3', - 'UVR-MDX-NET Karaoke') - l0.grid(row=13,column=0,padx=0,pady=0) - - l0=Label(frame0,text="Basic Ensemble Options",font=("Century Gothic", "10", "bold", "underline"), justify="center", fg="#f4f4f4") - l0.grid(row=1,column=1,padx=20,pady=10) - - l0=tk.Label(frame0,text='VR Model 1\n',font=("Century Gothic", "9", "bold"), justify="center", foreground='#13a4c9') - l0.grid(row=2,column=1,padx=0,pady=0) - - l0=ttk.OptionMenu(frame0, self.vrensemchoose_a_var, None, '1_HP-UVR', '2_HP-UVR', '3_HP-Vocal-UVR', - '4_HP-Vocal-UVR', '5_HP-Karaoke-UVR', '6_HP-Karaoke-UVR', '7_HP2-UVR', '8_HP2-UVR', - '9_HP2-UVR', '10_SP-UVR-2B-32000-1', '11_SP-UVR-2B-32000-2', '12_SP-UVR-3B-44100', '13_SP-UVR-4B-44100-1', - '14_SP-UVR-4B-44100-2', '15_SP-UVR-MID-44100-1', '16_SP-UVR-MID-44100-2', - 'MGM_MAIN_v4', 'MGM_HIGHEND_v4', 'MGM_LOWEND_A_v4', 'MGM_LOWEND_B_v4') - l0.grid(row=3,column=1,padx=0,pady=0) - - l0=tk.Label(frame0,text='\nVR Model 2\n',font=("Century Gothic", "9", "bold"), justify="center", foreground='#13a4c9') - l0.grid(row=4,column=1,padx=0,pady=0) - - l0=ttk.OptionMenu(frame0, self.vrensemchoose_b_var, None, '1_HP-UVR', '2_HP-UVR', '3_HP-Vocal-UVR', - '4_HP-Vocal-UVR', '5_HP-Karaoke-UVR', '6_HP-Karaoke-UVR', '7_HP2-UVR', '8_HP2-UVR', - '9_HP2-UVR', '10_SP-UVR-2B-32000-1', '11_SP-UVR-2B-32000-2', '12_SP-UVR-3B-44100', '13_SP-UVR-4B-44100-1', - '14_SP-UVR-4B-44100-2', '15_SP-UVR-MID-44100-1', '16_SP-UVR-MID-44100-2', - 'MGM_MAIN_v4', 'MGM_HIGHEND_v4', 'MGM_LOWEND_A_v4', 'MGM_LOWEND_B_v4') - l0.grid(row=5,column=1,padx=0,pady=0) - - l0=tk.Label(frame0,text='\nVR Model 3\n',font=("Century Gothic", "9", "bold"), justify="center", foreground='#13a4c9') - l0.grid(row=6,column=1,padx=0,pady=0) - - l0=ttk.OptionMenu(frame0, self.vrensemchoose_c_var, None, 'No Model', '1_HP-UVR', '2_HP-UVR', '3_HP-Vocal-UVR', - '4_HP-Vocal-UVR', '5_HP-Karaoke-UVR', '6_HP-Karaoke-UVR', '7_HP2-UVR', '8_HP2-UVR', - '9_HP2-UVR', '10_SP-UVR-2B-32000-1', '11_SP-UVR-2B-32000-2', '12_SP-UVR-3B-44100', '13_SP-UVR-4B-44100-1', - '14_SP-UVR-4B-44100-2', '15_SP-UVR-MID-44100-1', '16_SP-UVR-MID-44100-2', - 'MGM_MAIN_v4', 'MGM_HIGHEND_v4', 'MGM_LOWEND_A_v4', 'MGM_LOWEND_B_v4') - l0.grid(row=7,column=1,padx=0,pady=0) - - l0=tk.Label(frame0,text='\nVR Model 4\n',font=("Century Gothic", "9", "bold"), justify="center", foreground='#13a4c9') - l0.grid(row=8,column=1,padx=0,pady=0) - - l0=ttk.OptionMenu(frame0, self.vrensemchoose_d_var, None, 'No Model', '1_HP-UVR', '2_HP-UVR', '3_HP-Vocal-UVR', - '4_HP-Vocal-UVR', '5_HP-Karaoke-UVR', '6_HP-Karaoke-UVR', '7_HP2-UVR', '8_HP2-UVR', - '9_HP2-UVR', '10_SP-UVR-2B-32000-1', '11_SP-UVR-2B-32000-2', '12_SP-UVR-3B-44100', '13_SP-UVR-4B-44100-1', - '14_SP-UVR-4B-44100-2', '15_SP-UVR-MID-44100-1', '16_SP-UVR-MID-44100-2', - 'MGM_MAIN_v4', 'MGM_HIGHEND_v4', 'MGM_LOWEND_A_v4', 'MGM_LOWEND_B_v4') - l0.grid(row=9,column=1,padx=0,pady=0) - - l0=tk.Label(frame0,text='\nVR Model 5\n',font=("Century Gothic", "9", "bold"), justify="center", foreground='#13a4c9') - l0.grid(row=10,column=1,padx=0,pady=0) - - l0=ttk.OptionMenu(frame0, self.vrensemchoose_e_var, None, 'No Model', '1_HP-UVR', '2_HP-UVR', '3_HP-Vocal-UVR', - '4_HP-Vocal-UVR', '5_HP-Karaoke-UVR', '6_HP-Karaoke-UVR', '7_HP2-UVR', '8_HP2-UVR', - '9_HP2-UVR', '10_SP-UVR-2B-32000-1', '11_SP-UVR-2B-32000-2', '12_SP-UVR-3B-44100', '13_SP-UVR-4B-44100-1', - '14_SP-UVR-4B-44100-2', '15_SP-UVR-MID-44100-1', '16_SP-UVR-MID-44100-2', - 'MGM_MAIN_v4', 'MGM_HIGHEND_v4', 'MGM_LOWEND_A_v4', 'MGM_LOWEND_B_v4') - l0.grid(row=11,column=1,padx=0,pady=0) - - l0=Label(frame0,text="Additional Options",font=("Century Gothic", "10", "bold", "underline"), justify="center", fg="#f4f4f4") - l0.grid(row=1,column=2,padx=0,pady=0) - - l0=ttk.Checkbutton(frame0, text='Append Ensemble Name to Final Output', variable=self.appendensem_var) - l0.grid(row=2,column=2,padx=0,pady=0) - - l0=ttk.Checkbutton(frame0, text='Save Output Image Spectrogram (VR Architecture Only)', variable=self.outputImage_var) - l0.grid(row=3,column=2,padx=0,pady=0) - - l0=Label(frame0,text="Set Outside Parameters",font=("Century Gothic", "10", "bold", "underline"), fg="#f4f4f4") - l0.grid(row=4,column=2,padx=0,pady=0) - - l0=tk.Label(frame0, text='Window Size (VR Architecture)', font=("Century Gothic", "9", "bold"), foreground='#13a4c9') - l0.grid(row=5,column=2,padx=0,pady=0) - - l0=ttk.Entry(frame0, textvariable=self.winSize_var) - l0.grid(row=6,column=2,padx=0,pady=0) - - l0=tk.Label(frame0, text='Aggression Setting (VR Architecture)', font=("Century Gothic", "9", "bold"), foreground='#13a4c9') - l0.grid(row=7,column=2,padx=0,pady=0) - - l0=ttk.Entry(frame0, textvariable=self.agg_var) - l0.grid(row=8,column=2,padx=0,pady=0) - - l0=tk.Label(frame0, text='Chunks (MDX-Net)', font=("Century Gothic", "9", "bold"), foreground='#13a4c9') - l0.grid(row=9,column=2,padx=0,pady=0) - - l0=ttk.Entry(frame0, textvariable=self.chunks_var) - l0.grid(row=10,column=2,padx=0,pady=0) - - l0=tk.Label(frame0, text='N_FFT Scale (MDX-Net)', font=("Century Gothic", "9", "bold"), foreground='#13a4c9') - l0.grid(row=11,column=2,padx=0,pady=0) - - l0=ttk.Entry(frame0, textvariable=self.n_fft_scale_var) - l0.grid(row=12,column=2,padx=0,pady=0) - - l0=tk.Label(frame0, text='Dim_f (MDX-Net)', font=("Century Gothic", "9", "bold"), foreground='#13a4c9') - l0.grid(row=13,column=2,padx=0,pady=0) - - l0=ttk.Entry(frame0, textvariable=self.dim_f_var) - l0.grid(row=14,column=2,padx=0,pady=0) + l0=ttk.Button(frame0,text="Advanced VR Options", command=self.advanced_vr_options) + l0.grid(row=3,column=0,padx=20,pady=10) l0=ttk.Button(frame0,text='Open Utagoe', command=self.utagoe_start) + l0.grid(row=4,column=0,padx=20,pady=10) + l0=Label(frame0,text="Set Dropdown Parameters Manually",font=("Century Gothic", "10", "bold", "underline"), fg="#f4f4f4") + l0.grid(row=5,column=0,padx=30,pady=20) + + l0=tk.Label(frame0, text='Chunks (Set Manually)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=6,column=0,padx=0,pady=0) + + l0=ttk.Entry(frame0, textvariable=self.chunks_var, justify='center') + l0.grid(row=7,column=0,padx=20,pady=10) + + l0=tk.Label(frame0, text='Noise Reduction (Set Manually)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=8,column=0,padx=0,pady=0) + + l0=ttk.Entry(frame0, textvariable=self.noisereduc_s_var, justify='center') + l0.grid(row=9,column=0,padx=20,pady=10) + + l0=tk.Label(frame0, text='Window Size (Set Manually)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=10,column=0,padx=0,pady=0) + + l0=ttk.Entry(frame0, textvariable=self.winSize_var, justify="center") + l0.grid(row=11,column=0,padx=20,pady=10) + + l0=tk.Label(frame0, text='Aggression Setting (Set Manually)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=12,column=0,padx=0,pady=0) + + l0=ttk.Entry(frame0, textvariable=self.agg_var, justify="center") + l0.grid(row=13,column=0,padx=20,pady=10) frame0=Frame(tab10,highlightbackground='red',highlightthicknes=0) frame0.grid(row=0,column=0,padx=0,pady=30) @@ -2437,6 +2727,8 @@ class MainWindow(TkinterDnD.Tk): l0=ttk.Entry(frame0, textvariable=self.chunks_var) l0.grid(row=10,column=2,padx=0,pady=0) + + l0=tk.Label(frame0, text='N_FFT Scale (MDX-Net)', font=("Century Gothic", "9", "bold"), foreground='#13a4c9') l0.grid(row=11,column=2,padx=0,pady=0) @@ -2496,6 +2788,16 @@ class MainWindow(TkinterDnD.Tk): opener = "open" if sys.platform == "darwin" else "xdg-open" subprocess.call([opener, filename]) + def open_Modelfolder_de(self): + """Let user paste a ".pth" model to use for the vocal seperation""" + filename = 'models\Demucs_Model' + + if sys.platform == "win32": + os.startfile(filename) + else: + opener = "open" if sys.platform == "darwin" else "xdg-open" + subprocess.call([opener, filename]) + def open_appdir_filedialog(self): pathname = '.' @@ -2545,6 +2847,7 @@ class MainWindow(TkinterDnD.Tk): 'mdx_ensem_b': self.mdxensemchoose_b_var.get(), 'gpu': self.gpuConversion_var.get(), 'appendensem': self.appendensem_var.get(), + 'demucs_only': self.demucs_only_var.get(), 'postprocess': self.postprocessing_var.get(), 'tta': self.tta_var.get(), 'save': self.save_var.get(), @@ -2559,6 +2862,8 @@ class MainWindow(TkinterDnD.Tk): 'algo': self.algo_var.get(), 'ensChoose': self.ensChoose_var.get(), 'mdxnetModel': self.mdxnetModel_var.get(), + 'DemucsModel': self.DemucsModel_var.get(), + 'ModelParams': self.ModelParams_var.get(), #MDX-Net 'demucsmodel': self.demucsmodel_var.get(), 'non_red': self.non_red_var.get(), @@ -2568,6 +2873,12 @@ class MainWindow(TkinterDnD.Tk): 'chunks': chunks, 'n_fft_scale': self.n_fft_scale_var.get(), 'dim_f': self.dim_f_var.get(), + 'overlap': self.overlap_var.get(), + 'shifts': self.shifts_var.get(), + 'margin': self.margin_var.get(), + 'channel': self.channel_var.get(), + 'compensate': self.compensate_var.get(), + 'mdxnetModeltype': self.mdxnetModeltype_var.get(), 'noisereduc_s': noisereduc_s, 'mixing': mixing, }, diff --git a/inference_MDX.py b/inference_MDX.py index e252ee8..42a4fca 100644 --- a/inference_MDX.py +++ b/inference_MDX.py @@ -62,11 +62,16 @@ class Predictor(): self.demucs.load_state_dict(torch.load(demucs_name)) widget_text.write('Done!\n') self.demucs.eval() + self.onnx_models = {} c = 0 + print('stemtype: ', modeltype) + self.models = get_models('tdf_extra', load=False, device=cpu, stems=modeltype, n_fft_scale=n_fft_scale_set, dim_f=dim_f_set) - widget_text.write(base_text + 'Loading ONNX model... ') + if not data['demucs_only']: + widget_text.write(base_text + 'Loading ONNX model... ') + update_progress(**progress_kwargs, step=0.1) c+=1 @@ -85,7 +90,9 @@ class Predictor(): print(str(device)) self.onnx_models[c] = ort.InferenceSession(os.path.join('models/MDX_Net_Models', str(model_set) + '.onnx'), providers=run_type) - widget_text.write('Done!\n') + + if not data['demucs_only']: + widget_text.write('Done!\n') def prediction(self, m): #mix, rate = sf.read(m) @@ -100,8 +107,18 @@ class Predictor(): #Main Save Path save_path = os.path.dirname(_basename) + print('stemset_n: ', stemset_n) + #Vocal Path - vocal_name = '(Vocals)' + if stemset_n == '(Vocals)': + vocal_name = '(Vocals)' + elif stemset_n == '(Other)': + vocal_name = '(Other)' + elif stemset_n == '(Drums)': + vocal_name = '(Drums)' + elif stemset_n == '(Bass)': + vocal_name = '(Bass)' + if data['modelFolder']: vocal_path = '{save_path}/{file_name}.wav'.format( save_path=save_path, @@ -124,7 +141,16 @@ class Predictor(): file_name = f'{os.path.basename(_basename)}_{vocal_name}',) #Instrumental Path - Instrumental_name = '(Instrumental)' + + if stemset_n == '(Vocals)': + Instrumental_name = '(Instrumental)' + elif stemset_n == '(Other)': + Instrumental_name = '(No_Other)' + elif stemset_n == '(Drums)': + Instrumental_name = '(No_Drums)' + elif stemset_n == '(Bass)': + Instrumental_name = '(No_Bass)' + if data['modelFolder']: Instrumental_path = '{save_path}/{file_name}.wav'.format( save_path=save_path, @@ -147,7 +173,15 @@ class Predictor(): file_name = f'{os.path.basename(_basename)}_{Instrumental_name}',) #Non-Reduced Vocal Path - vocal_name = '(Vocals)' + if stemset_n == '(Vocals)': + vocal_name = '(Vocals)' + elif stemset_n == '(Other)': + vocal_name = '(Other)' + elif stemset_n == '(Drums)': + vocal_name = '(Drums)' + elif stemset_n == '(Bass)': + vocal_name = '(Bass)' + if data['modelFolder']: non_reduced_vocal_path = '{save_path}/{file_name}.wav'.format( save_path=save_path, @@ -222,7 +256,7 @@ class Predictor(): step=(0.9)) widget_text.write('Done!\n') widget_text.write(base_text + 'Performing Noise Reduction... ') - reduction_sen = float(int(data['noisereduc_s'])/10) + reduction_sen = float(data['noisereduc_s'])/10 subprocess.call("lib_v5\\sox\\sox.exe" + ' "' + f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' + "noisered lib_v5\\sox\\mdxnetnoisereduc.prof " + f"{reduction_sen}", @@ -442,8 +476,6 @@ class Predictor(): except: pass - - if data['noisereduc_s'] == 'None': pass elif data['non_red'] == True: @@ -536,7 +568,8 @@ class Predictor(): if not data['demucsmodel']: sources = self.demix_base(segmented_mix, margin_size=margin) - + elif data['demucs_only']: + sources = self.demix_demucs(segmented_mix, margin_size=margin) else: # both, apply spec effects base_out = self.demix_base(segmented_mix, margin_size=margin) demucs_out = self.demix_demucs(segmented_mix, margin_size=margin) @@ -545,10 +578,10 @@ class Predictor(): print('Warning: there are {} nan values in the array(s).'.format(nan_count)) demucs_out, base_out = np.nan_to_num(demucs_out), np.nan_to_num(base_out) sources = {} - - sources[3] = (spec_effects(wave=[demucs_out[3],base_out[0]], - algorithm='default', - value=b[3])*1.03597672895) # compensation + print(data['mixing']) + sources[3] = (spec_effects(wave=[demucs_out[source_val],base_out[0]], + algorithm=data['mixing'], + value=b[3])*float(data['compensate'])) # compensation return sources def demix_base(self, mixes, margin_size): @@ -607,6 +640,7 @@ class Predictor(): return _sources def demix_demucs(self, mix, margin_size): + print('shift_set ', shift_set) processed = {} demucsitera = len(mix) demucsitera_calc = demucsitera * 2 @@ -622,7 +656,6 @@ class Predictor(): cmix = torch.tensor(cmix, dtype=torch.float32) ref = cmix.mean(0) cmix = (cmix - ref.mean()) / ref.std() - shift_set = 0 with torch.no_grad(): sources = apply_model(self.demucs, cmix.to(device), split=True, overlap=overlap_set, shifts=shift_set) sources = (sources * ref.std() + ref.mean()).cpu().numpy() @@ -650,16 +683,22 @@ data = { 'chunks': 10, 'non_red': False, 'noisereduc_s': 3, - 'mixing': 'default', 'modelFolder': False, 'voc_only': False, 'inst_only': False, - 'break': False, 'n_fft_scale': 6144, 'dim_f': 2048, + 'overlap': 0.5, + 'shifts': 0, + 'margin': 44100, + 'channel': 64, + 'compensate': 1.03597672895, + 'demucs_only': False, + 'mixing': 'Default', + 'DemucsModel': 'demucs_extra-3646af93_org.th', # Choose Model 'mdxnetModel': 'UVR-MDX-NET 1', - 'high_end_process': 'mirroring', + 'mdxnetModeltype': 'Vocals (Custom)', } default_chunks = data['chunks'] default_noisereduc_s = data['noisereduc_s'] @@ -696,9 +735,6 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress global widget_text global gui_progress_bar global music_file - global channel_set - global margin_set - global overlap_set global default_chunks global default_noisereduc_s global _basename @@ -710,14 +746,17 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress global base_text global model_set global model_set_name + global stemset_n + + global channel_set + global margin_set + global overlap_set + global shift_set + global source_val # Update default settings default_chunks = data['chunks'] default_noisereduc_s = data['noisereduc_s'] - - channel_set = int(64) - margin_set = int(44100) - overlap_set = float(0.5) widget_text = text_widget gui_progress_bar = progress_var @@ -735,7 +774,6 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress ffmp_err = """audioread\__init__.py", line 116, in audio_open""" sf_write_err = "sf.write" - try: with open('errorlog.txt', 'w') as f: f.write(f'No errors to report at this time.' + f'\n\nLast Process Method Used: MDX-Net' + @@ -745,38 +783,112 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress data.update(kwargs) + if data['mdxnetModeltype'] == 'Vocals (Custom)': + stemset = 'v' + source_val_set = 3 + stem_name = '(Vocals)' + if data['mdxnetModeltype'] == 'Other (Custom)': + stemset = 'o' + source_val_set = 2 + stem_name = '(Other)' + if data['mdxnetModeltype'] == 'Drums (Custom)': + stemset = 'd' + source_val_set = 1 + stem_name = '(Drums)' + if data['mdxnetModeltype'] == 'Bass (Custom)': + stemset = 'b' + source_val_set = 0 + stem_name = '(Bass)' + if data['mdxnetModeltype'] == 'Vocals (Default)': + stemset = 'v' + source_val_set = 3 + stem_name = '(Vocals)' + if data['mdxnetModeltype'] == 'Other (Default)': + stemset = 'o' + source_val_set = 2 + stem_name = '(Other)' + if data['mdxnetModeltype'] == 'Drums (Default)': + stemset = 'd' + source_val_set = 1 + stem_name = '(Drums)' + if data['mdxnetModeltype'] == 'Bass (Default)': + stemset = 'b' + source_val_set = 0 + stem_name = '(Bass)' + if data['mdxnetModel'] == 'UVR-MDX-NET 1': - model_set = 'UVR_MDXNET_9703' - model_set_name = 'UVR_MDXNET_9703' - modeltype = 'vocals-one' + model_set = 'UVR_MDXNET_1_9703' + model_set_name = 'UVR_MDXNET_1_9703' + modeltype = 'v' + stemset_n = '(Vocals)' + source_val = 3 n_fft_scale_set=6144 dim_f_set=2048 elif data['mdxnetModel'] == 'UVR-MDX-NET 2': - model_set = 'UVR_MDXNET_9682' - model_set_name = 'UVR_MDXNET_9682' - modeltype = 'vocals-one' + model_set = 'UVR_MDXNET_2_9682' + model_set_name = 'UVR_MDXNET_2_9682' + modeltype = 'v' + stemset_n = '(Vocals)' + source_val = 3 n_fft_scale_set=6144 dim_f_set=2048 elif data['mdxnetModel'] == 'UVR-MDX-NET 3': - model_set = 'UVR_MDXNET_9662' - model_set_name = 'UVR_MDXNET_9662' + model_set = 'UVR_MDXNET_3_9662' + model_set_name = 'UVR_MDXNET_3_9662' + modeltype = 'v' + stemset_n = '(Vocals)' + source_val = 3 n_fft_scale_set=6144 dim_f_set=2048 elif data['mdxnetModel'] == 'UVR-MDX-NET Karaoke': model_set = 'UVR_MDXNET_KARA' model_set_name = 'UVR_MDXNET_Karaoke' - modeltype = 'vocals-one' + modeltype = 'v' + stemset_n = '(Vocals)' + source_val = 3 n_fft_scale_set=6144 dim_f_set=2048 + elif data['mdxnetModel'] == 'other': + model_set = 'other' + model_set_name = 'other' + modeltype = 'o' + stemset_n = '(Other)' + source_val = 2 + n_fft_scale_set=8192 + dim_f_set=2048 + elif data['mdxnetModel'] == 'drums': + model_set = 'drums' + model_set_name = 'drums' + modeltype = 'd' + stemset_n = '(Drums)' + source_val = 1 + n_fft_scale_set=4096 + dim_f_set=2048 + elif data['mdxnetModel'] == 'bass': + model_set = 'bass' + model_set_name = 'bass' + modeltype = 'b' + stemset_n = '(Bass)' + source_val = 0 + n_fft_scale_set=16384 + dim_f_set=2048 else: model_set = data['mdxnetModel'] model_set_name = data['mdxnetModel'] - modeltype = 'vocals-two' + modeltype = stemset + stemset_n = stem_name + source_val = source_val_set n_fft_scale_set=int(data['n_fft_scale']) dim_f_set=int(data['dim_f']) print(n_fft_scale_set) print(dim_f_set) + print(data['DemucsModel']) + + overlap_set = float(data['overlap']) + channel_set = int(data['channel']) + margin_set = int(data['margin']) + shift_set = int(data['shifts']) stime = time.perf_counter() progress_var.set(0) @@ -799,11 +911,20 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress print(model_set) try: + + if float(data['noisereduc_s']) >= 10: + text_widget.write('Error: Noise Reduction only supports values between 0-10.\nPlease set a value between 0-10 (with or without decimals) and try again.') + progress_var.set(0) + button_widget.configure(state=tk.NORMAL) # Enable Button + return + total, used, free = shutil.disk_usage("/") + total_space = int(total/1.074e+9) used_space = int(used/1.074e+9) free_space = int(free/1.074e+9) + if int(free/1.074e+9) <= int(2): text_widget.write('Error: Not enough storage on main drive to continue. Your main drive must have \nat least 3 GB\'s of storage in order for this application function properly. \n\nPlease ensure your main drive has at least 3 GB\'s of storage and try again.\n\n') @@ -837,12 +958,14 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress e = os.path.join(data["export_path"]) - demucsmodel = 'models/Demucs_Model/demucs_extra-3646af93_org.th' + demucsmodel = 'models/Demucs_Model/' + str(data['DemucsModel']) pred = Predictor() pred.prediction_setup(demucs_name=demucsmodel, channels=channel_set) + print(demucsmodel) + # split pred.prediction( m=music_file, diff --git a/inference_v5.py b/inference_v5.py index 67885bd..e112490 100644 --- a/inference_v5.py +++ b/inference_v5.py @@ -52,7 +52,8 @@ data = { # Constants 'window_size': 512, 'agg': 10, - 'high_end_process': 'mirroring' + 'high_end_process': 'mirroring', + 'ModelParams': 'Auto' } default_window_size = data['window_size'] @@ -428,159 +429,167 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress #v5 Models if model_hash == '47939caf0cfe52a0e81442b85b971dfd': - model_params_d=str('lib_v5/modelparams/4band_44100.json') - param_name=str('4band_44100') + model_params_auto=str('lib_v5/modelparams/4band_44100.json') + param_name_auto=str('4band_44100') if model_hash == '4e4ecb9764c50a8c414fee6e10395bbe': - model_params_d=str('lib_v5/modelparams/4band_v2.json') - param_name=str('4band_v2') + model_params_auto=str('lib_v5/modelparams/4band_v2.json') + param_name_auto=str('4band_v2') if model_hash == 'e60a1e84803ce4efc0a6551206cc4b71': - model_params_d=str('lib_v5/modelparams/4band_44100.json') - param_name=str('4band_44100') + model_params_auto=str('lib_v5/modelparams/4band_44100.json') + param_name_auto=str('4band_44100') if model_hash == 'a82f14e75892e55e994376edbf0c8435': - model_params_d=str('lib_v5/modelparams/4band_44100.json') - param_name=str('4band_44100') + model_params_auto=str('lib_v5/modelparams/4band_44100.json') + param_name_auto=str('4band_44100') if model_hash == '6dd9eaa6f0420af9f1d403aaafa4cc06': - model_params_d=str('lib_v5/modelparams/4band_v2_sn.json') - param_name=str('4band_v2_sn') + model_params_auto=str('lib_v5/modelparams/4band_v2_sn.json') + param_name_auto=str('4band_v2_sn') if model_hash == '5c7bbca45a187e81abbbd351606164e5': - model_params_d=str('lib_v5/modelparams/3band_44100_msb2.json') - param_name=str('3band_44100_msb2') + model_params_auto=str('lib_v5/modelparams/3band_44100_msb2.json') + param_name_auto=str('3band_44100_msb2') if model_hash == 'd6b2cb685a058a091e5e7098192d3233': - model_params_d=str('lib_v5/modelparams/3band_44100_msb2.json') - param_name=str('3band_44100_msb2') + model_params_auto=str('lib_v5/modelparams/3band_44100_msb2.json') + param_name_auto=str('3band_44100_msb2') if model_hash == 'c1b9f38170a7c90e96f027992eb7c62b': - model_params_d=str('lib_v5/modelparams/4band_44100.json') - param_name=str('4band_44100') + model_params_auto=str('lib_v5/modelparams/4band_44100.json') + param_name_auto=str('4band_44100') if model_hash == 'c3448ec923fa0edf3d03a19e633faa53': - model_params_d=str('lib_v5/modelparams/4band_44100.json') - param_name=str('4band_44100') + model_params_auto=str('lib_v5/modelparams/4band_44100.json') + param_name_auto=str('4band_44100') if model_hash == '68aa2c8093d0080704b200d140f59e54': - model_params_d=str('lib_v5/modelparams/3band_44100.json') - param_name=str('3band_44100.json') + model_params_auto=str('lib_v5/modelparams/3band_44100.json') + param_name_auto=str('3band_44100.json') if model_hash == 'fdc83be5b798e4bd29fe00fe6600e147': - model_params_d=str('lib_v5/modelparams/3band_44100_mid.json') - param_name=str('3band_44100_mid.json') + model_params_auto=str('lib_v5/modelparams/3band_44100_mid.json') + param_name_auto=str('3band_44100_mid.json') if model_hash == '2ce34bc92fd57f55db16b7a4def3d745': - model_params_d=str('lib_v5/modelparams/3band_44100_mid.json') - param_name=str('3band_44100_mid.json') + model_params_auto=str('lib_v5/modelparams/3band_44100_mid.json') + param_name_auto=str('3band_44100_mid.json') if model_hash == '52fdca89576f06cf4340b74a4730ee5f': - model_params_d=str('lib_v5/modelparams/4band_44100.json') - param_name=str('4band_44100.json') + model_params_auto=str('lib_v5/modelparams/4band_44100.json') + param_name_auto=str('4band_44100.json') if model_hash == '41191165b05d38fc77f072fa9e8e8a30': - model_params_d=str('lib_v5/modelparams/4band_44100.json') - param_name=str('4band_44100.json') + model_params_auto=str('lib_v5/modelparams/4band_44100.json') + param_name_auto=str('4band_44100.json') if model_hash == '89e83b511ad474592689e562d5b1f80e': - model_params_d=str('lib_v5/modelparams/2band_32000.json') - param_name=str('2band_32000.json') + model_params_auto=str('lib_v5/modelparams/2band_32000.json') + param_name_auto=str('2band_32000.json') if model_hash == '0b954da81d453b716b114d6d7c95177f': - model_params_d=str('lib_v5/modelparams/2band_32000.json') - param_name=str('2band_32000.json') + model_params_auto=str('lib_v5/modelparams/2band_32000.json') + param_name_auto=str('2band_32000.json') #v4 Models if model_hash == '6a00461c51c2920fd68937d4609ed6c8': - model_params_d=str('lib_v5/modelparams/1band_sr16000_hl512.json') - param_name=str('1band_sr16000_hl512') + model_params_auto=str('lib_v5/modelparams/1band_sr16000_hl512.json') + param_name_auto=str('1band_sr16000_hl512') if model_hash == '0ab504864d20f1bd378fe9c81ef37140': - model_params_d=str('lib_v5/modelparams/1band_sr32000_hl512.json') - param_name=str('1band_sr32000_hl512') + model_params_auto=str('lib_v5/modelparams/1band_sr32000_hl512.json') + param_name_auto=str('1band_sr32000_hl512') if model_hash == '7dd21065bf91c10f7fccb57d7d83b07f': - model_params_d=str('lib_v5/modelparams/1band_sr32000_hl512.json') - param_name=str('1band_sr32000_hl512') + model_params_auto=str('lib_v5/modelparams/1band_sr32000_hl512.json') + param_name_auto=str('1band_sr32000_hl512') if model_hash == '80ab74d65e515caa3622728d2de07d23': - model_params_d=str('lib_v5/modelparams/1band_sr32000_hl512.json') - param_name=str('1band_sr32000_hl512') + model_params_auto=str('lib_v5/modelparams/1band_sr32000_hl512.json') + param_name_auto=str('1band_sr32000_hl512') if model_hash == 'edc115e7fc523245062200c00caa847f': - model_params_d=str('lib_v5/modelparams/1band_sr33075_hl384.json') - param_name=str('1band_sr33075_hl384') + model_params_auto=str('lib_v5/modelparams/1band_sr33075_hl384.json') + param_name_auto=str('1band_sr33075_hl384') if model_hash == '28063e9f6ab5b341c5f6d3c67f2045b7': - model_params_d=str('lib_v5/modelparams/1band_sr33075_hl384.json') - param_name=str('1band_sr33075_hl384') + model_params_auto=str('lib_v5/modelparams/1band_sr33075_hl384.json') + param_name_auto=str('1band_sr33075_hl384') if model_hash == 'b58090534c52cbc3e9b5104bad666ef2': - model_params_d=str('lib_v5/modelparams/1band_sr44100_hl512.json') - param_name=str('1band_sr44100_hl512') + model_params_auto=str('lib_v5/modelparams/1band_sr44100_hl512.json') + param_name_auto=str('1band_sr44100_hl512') if model_hash == '0cdab9947f1b0928705f518f3c78ea8f': - model_params_d=str('lib_v5/modelparams/1band_sr44100_hl512.json') - param_name=str('1band_sr44100_hl512') + model_params_auto=str('lib_v5/modelparams/1band_sr44100_hl512.json') + param_name_auto=str('1band_sr44100_hl512') if model_hash == 'ae702fed0238afb5346db8356fe25f13': - model_params_d=str('lib_v5/modelparams/1band_sr44100_hl1024.json') - param_name=str('1band_sr44100_hl1024') + model_params_auto=str('lib_v5/modelparams/1band_sr44100_hl1024.json') + param_name_auto=str('1band_sr44100_hl1024') #User Models #1 Band if '1band_sr16000_hl512' in ModelName: - model_params_d=str('lib_v5/modelparams/1band_sr16000_hl512.json') - param_name=str('1band_sr16000_hl512') + model_params_auto=str('lib_v5/modelparams/1band_sr16000_hl512.json') + param_name_auto=str('1band_sr16000_hl512') if '1band_sr32000_hl512' in ModelName: - model_params_d=str('lib_v5/modelparams/1band_sr32000_hl512.json') - param_name=str('1band_sr32000_hl512') + model_params_auto=str('lib_v5/modelparams/1band_sr32000_hl512.json') + param_name_auto=str('1band_sr32000_hl512') if '1band_sr33075_hl384' in ModelName: - model_params_d=str('lib_v5/modelparams/1band_sr33075_hl384.json') - param_name=str('1band_sr33075_hl384') + model_params_auto=str('lib_v5/modelparams/1band_sr33075_hl384.json') + param_name_auto=str('1band_sr33075_hl384') if '1band_sr44100_hl256' in ModelName: - model_params_d=str('lib_v5/modelparams/1band_sr44100_hl256.json') - param_name=str('1band_sr44100_hl256') + model_params_auto=str('lib_v5/modelparams/1band_sr44100_hl256.json') + param_name_auto=str('1band_sr44100_hl256') if '1band_sr44100_hl512' in ModelName: - model_params_d=str('lib_v5/modelparams/1band_sr44100_hl512.json') - param_name=str('1band_sr44100_hl512') + model_params_auto=str('lib_v5/modelparams/1band_sr44100_hl512.json') + param_name_auto=str('1band_sr44100_hl512') if '1band_sr44100_hl1024' in ModelName: - model_params_d=str('lib_v5/modelparams/1band_sr44100_hl1024.json') - param_name=str('1band_sr44100_hl1024') + model_params_auto=str('lib_v5/modelparams/1band_sr44100_hl1024.json') + param_name_auto=str('1band_sr44100_hl1024') #2 Band if '2band_44100_lofi' in ModelName: - model_params_d=str('lib_v5/modelparams/2band_44100_lofi.json') - param_name=str('2band_44100_lofi') + model_params_auto=str('lib_v5/modelparams/2band_44100_lofi.json') + param_name_auto=str('2band_44100_lofi') if '2band_32000' in ModelName: - model_params_d=str('lib_v5/modelparams/2band_32000.json') - param_name=str('2band_32000') + model_params_auto=str('lib_v5/modelparams/2band_32000.json') + param_name_auto=str('2band_32000') if '2band_48000' in ModelName: - model_params_d=str('lib_v5/modelparams/2band_48000.json') - param_name=str('2band_48000') + model_params_auto=str('lib_v5/modelparams/2band_48000.json') + param_name_auto=str('2band_48000') #3 Band if '3band_44100' in ModelName: - model_params_d=str('lib_v5/modelparams/3band_44100.json') - param_name=str('3band_44100') + model_params_auto=str('lib_v5/modelparams/3band_44100.json') + param_name_auto=str('3band_44100') if '3band_44100_mid' in ModelName: - model_params_d=str('lib_v5/modelparams/3band_44100_mid.json') - param_name=str('3band_44100_mid') + model_params_auto=str('lib_v5/modelparams/3band_44100_mid.json') + param_name_auto=str('3band_44100_mid') if '3band_44100_msb2' in ModelName: - model_params_d=str('lib_v5/modelparams/3band_44100_msb2.json') - param_name=str('3band_44100_msb2') + model_params_auto=str('lib_v5/modelparams/3band_44100_msb2.json') + param_name_auto=str('3band_44100_msb2') #4 Band if '4band_44100' in ModelName: - model_params_d=str('lib_v5/modelparams/4band_44100.json') - param_name=str('4band_44100') + model_params_auto=str('lib_v5/modelparams/4band_44100.json') + param_name_auto=str('4band_44100') if '4band_44100_mid' in ModelName: - model_params_d=str('lib_v5/modelparams/4band_44100_mid.json') - param_name=str('4band_44100_mid') + model_params_auto=str('lib_v5/modelparams/4band_44100_mid.json') + param_name_auto=str('4band_44100_mid') if '4band_44100_msb' in ModelName: - model_params_d=str('lib_v5/modelparams/4band_44100_msb.json') - param_name=str('4band_44100_msb') + model_params_auto=str('lib_v5/modelparams/4band_44100_msb.json') + param_name_auto=str('4band_44100_msb') if '4band_44100_msb2' in ModelName: - model_params_d=str('lib_v5/modelparams/4band_44100_msb2.json') - param_name=str('4band_44100_msb2') + model_params_auto=str('lib_v5/modelparams/4band_44100_msb2.json') + param_name_auto=str('4band_44100_msb2') if '4band_44100_reverse' in ModelName: - model_params_d=str('lib_v5/modelparams/4band_44100_reverse.json') - param_name=str('4band_44100_reverse') + model_params_auto=str('lib_v5/modelparams/4band_44100_reverse.json') + param_name_auto=str('4band_44100_reverse') if '4band_44100_sw' in ModelName: - model_params_d=str('lib_v5/modelparams/4band_44100_sw.json') - param_name=str('4band_44100_sw') + model_params_auto=str('lib_v5/modelparams/4band_44100_sw.json') + param_name_auto=str('4band_44100_sw') if '4band_v2' in ModelName: - model_params_d=str('lib_v5/modelparams/4band_v2.json') - param_name=str('4band_v2') + model_params_auto=str('lib_v5/modelparams/4band_v2.json') + param_name_auto=str('4band_v2') if '4band_v2_sn' in ModelName: - model_params_d=str('lib_v5/modelparams/4band_v2_sn.json') - param_name=str('4band_v2_sn') + model_params_auto=str('lib_v5/modelparams/4band_v2_sn.json') + param_name_auto=str('4band_v2_sn') if 'tmodelparam' in ModelName: - model_params_d=str('lib_v5/modelparams/tmodelparam.json') - param_name=str('User Model Param Set') + model_params_auto=str('lib_v5/modelparams/tmodelparam.json') + param_name_auto=str('User Model Param Set') text_widget.write(' Done!\n') - + + + if data['ModelParams'] == 'Auto': + param_name = param_name_auto + model_params_d = model_params_auto + else: + param_name = str(data['ModelParams']) + model_params_d = str('lib_v5/modelparams/' + data['ModelParams']) + try: print('Model Parameters:', model_params_d) text_widget.write(base_text + 'Loading assigned model parameters ' + '\"' + param_name + '\"... ') @@ -617,6 +626,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress button_widget.configure(state=tk.NORMAL) # Enable Button return + mp = ModelParameters(model_params_d) text_widget.write('Done!\n') # -Instrumental- diff --git a/inference_v5_ensemble.py b/inference_v5_ensemble.py index 477d474..377bf88 100644 --- a/inference_v5_ensemble.py +++ b/inference_v5_ensemble.py @@ -71,8 +71,10 @@ class Predictor(): self.onnx_models = {} c = 0 - self.models = get_models('tdf_extra', load=False, device=cpu, stems=modeltype) - widget_text.write(base_text + 'Loading ONNX model... ') + self.models = get_models('tdf_extra', load=False, device=cpu, stems=modeltype, n_fft_scale=n_fft_scale_set, dim_f=dim_f_set) + if not data['demucs_only']: + widget_text.write(base_text + 'Loading ONNX model... ') + update_progress(**progress_kwargs, step=0.1) c+=1 @@ -92,8 +94,10 @@ class Predictor(): print('model_set: ', model_set) self.onnx_models[c] = ort.InferenceSession(os.path.join('models/MDX_Net_Models', model_set), providers=run_type) - widget_text.write('Done!\n') - + + if not data['demucs_only']: + widget_text.write('Done!\n') + def prediction(self, m): #mix, rate = sf.read(m) mix, rate = librosa.load(m, mono=False, sr=44100) @@ -181,7 +185,7 @@ class Predictor(): step=(0.9)) widget_text.write('Done!\n') widget_text.write(base_text + 'Performing Noise Reduction... ') - reduction_sen = float(int(data['noisereduc_s'])/10) + reduction_sen = float(data['noisereduc_s'])/10 subprocess.call("lib_v5\\sox\\sox.exe" + ' "' + f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' + "noisered lib_v5\\sox\\mdxnetnoisereduc.prof " + f"{reduction_sen}", @@ -353,7 +357,8 @@ class Predictor(): if not data['demucsmodel']: sources = self.demix_base(segmented_mix, margin_size=margin) - + elif data['demucs_only']: + sources = self.demix_demucs(segmented_mix, margin_size=margin) else: # both, apply spec effects base_out = self.demix_base(segmented_mix, margin_size=margin) demucs_out = self.demix_demucs(segmented_mix, margin_size=margin) @@ -364,8 +369,8 @@ class Predictor(): sources = {} sources[3] = (spec_effects(wave=[demucs_out[3],base_out[0]], - algorithm='default', - value=b[3])*1.03597672895) # compensation + algorithm=data['mixing'], + value=b[3])*float(data['compensate'])) # compensation return sources def demix_base(self, mixes, margin_size): @@ -439,7 +444,6 @@ class Predictor(): cmix = torch.tensor(cmix, dtype=torch.float32) ref = cmix.mean(0) cmix = (cmix - ref.mean()) / ref.std() - shift_set = 0 with torch.no_grad(): sources = apply_model(self.demucs, cmix.to(device), split=True, overlap=overlap_set, shifts=shift_set) sources = (sources * ref.std() + ref.mean()).cpu().numpy() @@ -562,11 +566,20 @@ data = { 'chunks': 'auto', 'non_red': False, 'noisereduc_s': 3, - 'mixing': 'default', 'ensChoose': 'Basic Ensemble', 'algo': 'Instrumentals (Min Spec)', #Advanced Options 'appendensem': False, + + 'overlap': 0.5, + 'shifts': 0, + 'margin': 44100, + 'channel': 64, + 'compensate': 1.03597672895, + 'demucs_only': False, + 'mixing': 'Default', + 'DemucsModel': 'demucs_extra-3646af93_org.th', + # Models 'instrumentalModel': None, 'useModel': None, @@ -602,9 +615,6 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress global widget_text global gui_progress_bar global music_file - global channel_set - global margin_set - global overlap_set global default_chunks global default_noisereduc_s global base_name @@ -615,13 +625,17 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress global model_set_name global ModelName_2 + global channel_set + global margin_set + global overlap_set + global shift_set + + global n_fft_scale_set + global dim_f_set + # Update default settings default_chunks = data['chunks'] default_noisereduc_s = data['noisereduc_s'] - - channel_set = int(64) - margin_set = int(44100) - overlap_set = float(0.5) widget_text = text_widget gui_progress_bar = progress_var @@ -647,6 +661,15 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress except: pass + + overlap_set = float(data['overlap']) + channel_set = int(data['channel']) + margin_set = int(data['margin']) + shift_set = int(data['shifts']) + + n_fft_scale_set=6144 + dim_f_set=2048 + global nn_arch_sizes global nn_architecture @@ -1195,11 +1218,11 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress #MDX-Net Model if data['mdx_ensem'] == 'UVR-MDX-NET 1': - mdx_ensem = 'UVR_MDXNET_9703' + mdx_ensem = 'UVR_MDXNET_1_9703' if data['mdx_ensem'] == 'UVR-MDX-NET 2': - mdx_ensem = 'UVR_MDXNET_9682' + mdx_ensem = 'UVR_MDXNET_2_9682' if data['mdx_ensem'] == 'UVR-MDX-NET 3': - mdx_ensem = 'UVR_MDXNET_9662' + mdx_ensem = 'UVR_MDXNET_3_9662' if data['mdx_ensem'] == 'UVR-MDX-NET Karaoke': mdx_ensem = 'UVR_MDXNET_KARA' @@ -1207,11 +1230,11 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress #MDX-Net Model 2 if data['mdx_ensem_b'] == 'UVR-MDX-NET 1': - mdx_ensem_b = 'UVR_MDXNET_9703' + mdx_ensem_b = 'UVR_MDXNET_1_9703' if data['mdx_ensem_b'] == 'UVR-MDX-NET 2': - mdx_ensem_b = 'UVR_MDXNET_9682' + mdx_ensem_b = 'UVR_MDXNET_2_9682' if data['mdx_ensem_b'] == 'UVR-MDX-NET 3': - mdx_ensem_b = 'UVR_MDXNET_9662' + mdx_ensem_b = 'UVR_MDXNET_3_9662' if data['mdx_ensem_b'] == 'UVR-MDX-NET Karaoke': mdx_ensem_b = 'UVR_MDXNET_Karaoke' if data['mdx_ensem_b'] == 'No Model': @@ -1925,23 +1948,23 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress else: text_widget.write('Ensemble Mode - Running Model - ' + mdx_name + '\n\n') - if mdx_name == 'UVR_MDXNET_9703': - mdx_ensem_b = 'UVR_MDXNET_9703' - model_set = 'UVR_MDXNET_9703.onnx' - model_set_name = 'UVR_MDXNET_9703' - modeltype = 'vocals-one' - if mdx_name == 'UVR_MDXNET_9682': - model_set = 'UVR_MDXNET_9682.onnx' - model_set_name = 'UVR_MDXNET_9682' - modeltype = 'vocals-one' - if mdx_name == 'UVR_MDXNET_9662': - model_set = 'UVR_MDXNET_9662.onnx' - model_set_name = 'UVR_MDXNET_9662' - modeltype = 'vocals-one' + if mdx_name == 'UVR_MDXNET_1_9703': + mdx_ensem_b = 'UVR_MDXNET_1_9703' + model_set = 'UVR_MDXNET_1_9703.onnx' + model_set_name = 'UVR_MDXNET_1_9703' + modeltype = 'v' + if mdx_name == 'UVR_MDXNET_2_9682': + model_set = 'UVR_MDXNET_2_9682.onnx' + model_set_name = 'UVR_MDXNET_2_9682' + modeltype = 'v' + if mdx_name == 'UVR_MDXNET_3_9662': + model_set = 'UVR_MDXNET_3_9662.onnx' + model_set_name = 'UVR_MDXNET_3_9662' + modeltype = 'v' if mdx_name == 'UVR_MDXNET_Karaoke': model_set = 'UVR_MDXNET_KARA.onnx' model_set_name = 'UVR_MDXNET_Karaoke' - modeltype = 'vocals-one' + modeltype = 'v' update_progress(**progress_kwargs, step=0) @@ -1958,7 +1981,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress e = os.path.join(data["export_path"]) - demucsmodel = 'models/Demucs_Model/demucs_extra-3646af93_org.th' + demucsmodel = 'models/Demucs_Model/' + str(data['DemucsModel']) pred = Predictor() pred.prediction_setup(demucs_name=demucsmodel, diff --git a/models.py b/models.py index e2f083d..0b0af07 100644 --- a/models.py +++ b/models.py @@ -60,18 +60,18 @@ def istft(spec, hl): return wave -def spec_effects(wave, algorithm='default', value=None): +def spec_effects(wave, algorithm='Default', value=None): spec = [stft(wave[0],2048,1024),stft(wave[1],2048,1024)] - if algorithm == 'min_mag': + if algorithm == 'Min_Mag': v_spec_m = np.where(np.abs(spec[1]) <= np.abs(spec[0]), spec[1], spec[0]) wave = istft(v_spec_m,1024) - elif algorithm == 'max_mag': + elif algorithm == 'Max_Mag': v_spec_m = np.where(np.abs(spec[1]) >= np.abs(spec[0]), spec[1], spec[0]) wave = istft(v_spec_m,1024) - elif algorithm == 'default': + elif algorithm == 'Default': #wave = [istft(spec[0],1024),istft(spec[1],1024)] wave = (wave[1] * value) + (wave[0] * (1-value)) - elif algorithm == 'invert_p': + elif algorithm == 'Invert_p': X_mag = np.abs(spec[0]) y_mag = np.abs(spec[1]) max_mag = np.where(X_mag >= y_mag, X_mag, y_mag) @@ -80,26 +80,43 @@ def spec_effects(wave, algorithm='default', value=None): return wave -def get_models(name, device, n_fft_scale, dim_f, load=True, stems='vocals-onevocals-two'): +def get_models(name, device, n_fft_scale, dim_f, load=True, stems='bdov'): if name=='tdf_extra': models = [] - if 'vocals-one' in stems: + if 'b' in stems: models.append( - Conv_TDF_net_trim( + Conv_TDF_net_trim( device=device, load=load, n_fft_scale=n_fft_scale, - model_name='Conv-TDF', target_name='vocals-one', + model_name='Conv-TDF', target_name='bass', L=11, dim_f=dim_f, dim_t=8 ) ) - if 'vocals-two' in stems: + if 'd' in stems: models.append( - Conv_TDF_net_trim( + Conv_TDF_net_trim( device=device, load=load, n_fft_scale=n_fft_scale, - model_name='Conv-TDF', target_name='vocals-two', + model_name='Conv-TDF', target_name='drums', + L=9, dim_f=dim_f, dim_t=7 + ) + ) + if 'o' in stems: + models.append( + Conv_TDF_net_trim( + device=device, load=load, n_fft_scale=n_fft_scale, + model_name='Conv-TDF', target_name='other', L=11, dim_f=dim_f, dim_t=8 ) ) + if 'v' in stems: + models.append( + Conv_TDF_net_trim( + device=device, load=load, n_fft_scale=n_fft_scale, + model_name='Conv-TDF', target_name='vocals', + L=11, dim_f=dim_f, dim_t=8 + ) + ) + return models else: