Add files via upload

This commit is contained in:
Anjok07
2022-06-13 02:07:19 -05:00
committed by GitHub
parent 8ef8183411
commit b6d6d72bcc
5 changed files with 2518 additions and 711 deletions

1254
UVR.py

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,4 @@
import os import os
from pickle import STOP
from tracemalloc import stop
from turtle import update
import subprocess import subprocess
from unittest import skip from unittest import skip
from pathlib import Path from pathlib import Path
@@ -11,14 +8,18 @@ import pydub
import shutil import shutil
import hashlib import hashlib
import gc
#MDX-Net #MDX-Net
#---------------------------------------- #----------------------------------------
import soundfile as sf import soundfile as sf
import torch import torch
import numpy as np import numpy as np
from demucs.model import Demucs
from demucs.utils import apply_model from demucs.pretrained import get_model as _gm
from demucs.hdemucs import HDemucs
from demucs.apply import BagOfModels, apply_model
from demucs.audio import AudioFile
import pathlib
from models import get_models, spec_effects from models import get_models, spec_effects
import onnxruntime as ort import onnxruntime as ort
import time import time
@@ -37,38 +38,43 @@ import torch
import tkinter as tk import tkinter as tk
import traceback # Error Message Recent Calls import traceback # Error Message Recent Calls
import time # Timer import time # Timer
from typing import Literal
class Predictor(): class Predictor():
def __init__(self): def __init__(self):
pass pass
def prediction_setup(self, demucs_name, def prediction_setup(self):
channels=64):
global device global device
print('Print the gpu setting: ', data['gpu']) print('Print the gpu setting: ', data['gpu'])
if data['gpu'] >= 0: if data['gpu'] >= 0:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
if data['gpu'] == -1: if data['gpu'] == -1:
device = torch.device('cpu') device = torch.device('cpu')
if data['demucsmodel']: if data['demucsmodel']:
self.demucs = Demucs(sources=["drums", "bass", "other", "vocals"], channels=channels) if 'UVR' in demucs_model_set:
widget_text.write(base_text + 'Loading Demucs model... ') self.demucs = HDemucs(sources=["other", "vocals"])
else:
self.demucs = HDemucs(sources=["drums", "bass", "other", "vocals"])
widget_text.write(base_text + 'Loading Demucs model...')
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
step=0.05) step=0.05)
path_d = Path('models/Demucs_Models')
self.demucs = _gm(name=demucs_model_set, repo=path_d)
self.demucs.to(device) self.demucs.to(device)
self.demucs.load_state_dict(torch.load(demucs_name))
widget_text.write('Done!\n')
self.demucs.eval() self.demucs.eval()
widget_text.write('Done!\n')
if isinstance(self.demucs, BagOfModels):
widget_text.write(base_text + f"Selected Demucs model is a bag of {len(self.demucs.models)} model(s).\n")
self.onnx_models = {} self.onnx_models = {}
c = 0 c = 0
print('stemtype: ', modeltype)
self.models = get_models('tdf_extra', load=False, device=cpu, stems=modeltype, n_fft_scale=n_fft_scale_set, dim_f=dim_f_set) self.models = get_models('tdf_extra', load=False, device=cpu, stems=modeltype, n_fft_scale=n_fft_scale_set, dim_f=dim_f_set)
if not data['demucs_only']: if not data['demucs_only']:
widget_text.write(base_text + 'Loading ONNX model... ') widget_text.write(base_text + 'Loading ONNX model... ')
@@ -87,19 +93,17 @@ class Predictor():
elif data['gpu'] == -1: elif data['gpu'] == -1:
run_type = ['CPUExecutionProvider'] run_type = ['CPUExecutionProvider']
print(run_type) print('Selected Model: ', model_set)
print(str(device))
self.onnx_models[c] = ort.InferenceSession(os.path.join('models/MDX_Net_Models', str(model_set) + '.onnx'), providers=run_type) self.onnx_models[c] = ort.InferenceSession(os.path.join('models/MDX_Net_Models', str(model_set) + '.onnx'), providers=run_type)
if not data['demucs_only']: if not data['demucs_only']:
widget_text.write('Done!\n') widget_text.write('Done!\n')
def prediction(self, m): def prediction(self, m):
#mix, rate = sf.read(m) mix, samplerate = librosa.load(m, mono=False, sr=44100)
mix, rate = librosa.load(m, mono=False, sr=44100)
if mix.ndim == 1: if mix.ndim == 1:
mix = np.asfortranarray([mix,mix]) mix = np.asfortranarray([mix,mix])
samplerate = samplerate
mix = mix.T mix = mix.T
sources = self.demix(mix.T) sources = self.demix(mix.T)
widget_text.write(base_text + 'Inferences complete!\n') widget_text.write(base_text + 'Inferences complete!\n')
@@ -226,13 +230,12 @@ class Predictor():
c += 1 c += 1
if not data['demucsmodel']: if not data['demucsmodel']:
if data['inst_only']: if data['inst_only']:
widget_text.write(base_text + 'Preparing to save Instrumental...') widget_text.write(base_text + 'Preparing to save Instrumental...')
else: else:
widget_text.write(base_text + 'Saving vocals... ') widget_text.write(base_text + 'Saving vocals... ')
sf.write(non_reduced_vocal_path, sources[c].T, rate) sf.write(non_reduced_vocal_path, sources[c].T, samplerate)
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
step=(0.9)) step=(0.9))
widget_text.write('Done!\n') widget_text.write('Done!\n')
@@ -240,7 +243,7 @@ class Predictor():
reduction_sen = float(int(data['noisereduc_s'])/10) reduction_sen = float(int(data['noisereduc_s'])/10)
subprocess.call("lib_v5\\sox\\sox.exe" + ' "' + subprocess.call("lib_v5\\sox\\sox.exe" + ' "' +
f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' + f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' +
"noisered lib_v5\\sox\\mdxnetnoisereduc.prof " + f"{reduction_sen}", "noisered lib_v5\\sox\\" + noise_pro_set + ".prof " + f"{reduction_sen}",
shell=True, stdout=subprocess.PIPE, shell=True, stdout=subprocess.PIPE,
stdin=subprocess.PIPE, stderr=subprocess.PIPE) stdin=subprocess.PIPE, stderr=subprocess.PIPE)
widget_text.write('Done!\n') widget_text.write('Done!\n')
@@ -252,7 +255,11 @@ class Predictor():
else: else:
widget_text.write(base_text + 'Saving Vocals... ') widget_text.write(base_text + 'Saving Vocals... ')
sf.write(non_reduced_vocal_path, sources[3].T, rate) if data['demucs_only']:
if 'UVR' in demucs_model_set:
sf.write(non_reduced_vocal_path, sources[1].T, samplerate)
else:
sf.write(non_reduced_vocal_path, sources[source_val].T, samplerate)
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
step=(0.9)) step=(0.9))
widget_text.write('Done!\n') widget_text.write('Done!\n')
@@ -275,7 +282,7 @@ class Predictor():
widget_text.write(base_text + 'Preparing Instrumental...') widget_text.write(base_text + 'Preparing Instrumental...')
else: else:
widget_text.write(base_text + 'Saving Vocals... ') widget_text.write(base_text + 'Saving Vocals... ')
sf.write(vocal_path, sources[c].T, rate) sf.write(vocal_path, sources[c].T, samplerate)
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
step=(0.9)) step=(0.9))
widget_text.write('Done!\n') widget_text.write('Done!\n')
@@ -284,7 +291,15 @@ class Predictor():
widget_text.write(base_text + 'Preparing Instrumental...') widget_text.write(base_text + 'Preparing Instrumental...')
else: else:
widget_text.write(base_text + 'Saving Vocals... ') widget_text.write(base_text + 'Saving Vocals... ')
sf.write(vocal_path, sources[3].T, rate)
if data['demucs_only']:
if 'UVR' in demucs_model_set:
sf.write(vocal_path, sources[1].T, samplerate)
else:
sf.write(vocal_path, sources[source_val].T, samplerate)
else:
sf.write(vocal_path, sources[source_val].T, samplerate)
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
step=(0.9)) step=(0.9))
widget_text.write('Done!\n') widget_text.write('Done!\n')
@@ -470,13 +485,6 @@ class Predictor():
errmessage + f'\nError Time Stamp: [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]\n') errmessage + f'\nError Time Stamp: [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]\n')
except: except:
pass pass
try:
print('Is there already a voc file there? ', file_exists_v)
print('Is there already a non_voc file there? ', file_exists_n)
except:
pass
if data['noisereduc_s'] == 'None': if data['noisereduc_s'] == 'None':
pass pass
@@ -567,23 +575,37 @@ class Predictor():
segmented_mix[skip] = mix[:,start:end].copy() segmented_mix[skip] = mix[:,start:end].copy()
if end == samples: if end == samples:
break break
if not data['demucsmodel']: if not data['demucsmodel']:
sources = self.demix_base(segmented_mix, margin_size=margin) sources = self.demix_base(segmented_mix, margin_size=margin)
elif data['demucs_only']: elif data['demucs_only']:
sources = self.demix_demucs(segmented_mix, margin_size=margin) if split_mode == True:
sources = self.demix_demucs_split(mix)
if split_mode == False:
sources = self.demix_demucs(segmented_mix, margin_size=margin)
else: # both, apply spec effects else: # both, apply spec effects
base_out = self.demix_base(segmented_mix, margin_size=margin) base_out = self.demix_base(segmented_mix, margin_size=margin)
demucs_out = self.demix_demucs(segmented_mix, margin_size=margin) print(split_mode)
if split_mode == True:
demucs_out = self.demix_demucs_split(mix)
if split_mode == False:
demucs_out = self.demix_demucs(segmented_mix, margin_size=margin)
nan_count = np.count_nonzero(np.isnan(demucs_out)) + np.count_nonzero(np.isnan(base_out)) nan_count = np.count_nonzero(np.isnan(demucs_out)) + np.count_nonzero(np.isnan(base_out))
if nan_count > 0: if nan_count > 0:
print('Warning: there are {} nan values in the array(s).'.format(nan_count)) print('Warning: there are {} nan values in the array(s).'.format(nan_count))
demucs_out, base_out = np.nan_to_num(demucs_out), np.nan_to_num(base_out) demucs_out, base_out = np.nan_to_num(demucs_out), np.nan_to_num(base_out)
sources = {} sources = {}
print(data['mixing']) print(data['mixing'])
sources[3] = (spec_effects(wave=[demucs_out[source_val],base_out[0]],
algorithm=data['mixing'], if 'UVR' in demucs_model_set:
value=b[3])*float(data['compensate'])) # compensation sources[source_val] = (spec_effects(wave=[demucs_out[1],base_out[0]],
algorithm=data['mixing'],
value=b[source_val])*float(data['compensate'])) # compensation
else:
sources[source_val] = (spec_effects(wave=[demucs_out[source_val],base_out[0]],
algorithm=data['mixing'],
value=b[source_val])*float(data['compensate'])) # compensation
return sources return sources
def demix_base(self, mixes, margin_size): def demix_base(self, mixes, margin_size):
@@ -594,6 +616,7 @@ class Predictor():
widget_text.write(base_text + "Running ONNX Inference...\n") widget_text.write(base_text + "Running ONNX Inference...\n")
widget_text.write(base_text + "Processing "f"{onnxitera} slices... ") widget_text.write(base_text + "Processing "f"{onnxitera} slices... ")
print(' Running ONNX Inference...') print(' Running ONNX Inference...')
for mix in mixes: for mix in mixes:
gui_progress_bar_onnx += 1 gui_progress_bar_onnx += 1
if data['demucsmodel']: if data['demucsmodel']:
@@ -602,6 +625,7 @@ class Predictor():
else: else:
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
step=(0.1 + (0.9/onnxitera * gui_progress_bar_onnx))) step=(0.1 + (0.9/onnxitera * gui_progress_bar_onnx)))
cmix = mixes[mix] cmix = mixes[mix]
sources = [] sources = []
n_sample = cmix.shape[1] n_sample = cmix.shape[1]
@@ -634,7 +658,6 @@ class Predictor():
end = None end = None
sources.append(tar_signal[:,start:end]) sources.append(tar_signal[:,start:end])
chunked_sources.append(sources) chunked_sources.append(sources)
_sources = np.concatenate(chunked_sources, axis=-1) _sources = np.concatenate(chunked_sources, axis=-1)
del self.onnx_models del self.onnx_models
@@ -647,6 +670,7 @@ class Predictor():
demucsitera = len(mix) demucsitera = len(mix)
demucsitera_calc = demucsitera * 2 demucsitera_calc = demucsitera * 2
gui_progress_bar_demucs = 0 gui_progress_bar_demucs = 0
widget_text.write(base_text + "Split Mode is off. (Chunks enabled for Demucs Model)\n")
widget_text.write(base_text + "Running Demucs Inference...\n") widget_text.write(base_text + "Running Demucs Inference...\n")
widget_text.write(base_text + "Processing "f"{len(mix)} slices... ") widget_text.write(base_text + "Processing "f"{len(mix)} slices... ")
print(' Running Demucs Inference...') print(' Running Demucs Inference...')
@@ -659,7 +683,8 @@ class Predictor():
ref = cmix.mean(0) ref = cmix.mean(0)
cmix = (cmix - ref.mean()) / ref.std() cmix = (cmix - ref.mean()) / ref.std()
with torch.no_grad(): with torch.no_grad():
sources = apply_model(self.demucs, cmix.to(device), split=True, overlap=overlap_set, shifts=shift_set) print(split_mode)
sources = apply_model(self.demucs, cmix[None], split=split_mode, device=device, overlap=overlap_set, shifts=shift_set, progress=False)[0]
sources = (sources * ref.std() + ref.mean()).cpu().numpy() sources = (sources * ref.std() + ref.mean()).cpu().numpy()
sources[[0,1]] = sources[[1,0]] sources[[0,1]] = sources[[1,0]]
@@ -673,6 +698,27 @@ class Predictor():
sources = np.concatenate(sources, axis=-1) sources = np.concatenate(sources, axis=-1)
widget_text.write('Done!\n') widget_text.write('Done!\n')
return sources return sources
def demix_demucs_split(self, mix):
print('shift_set ', shift_set)
widget_text.write(base_text + "Split Mode is on. (Chunks disabled for Demucs Model)\n")
widget_text.write(base_text + "Running Demucs Inference...\n")
widget_text.write(base_text + "Processing "f"{len(mix)} slices... ")
print(' Running Demucs Inference...')
mix = torch.tensor(mix, dtype=torch.float32)
ref = mix.mean(0)
mix = (mix - ref.mean()) / ref.std()
with torch.no_grad():
sources = apply_model(self.demucs, mix[None], split=split_mode, device=device, overlap=overlap_set, shifts=shift_set, progress=False)[0]
widget_text.write('Done!\n')
sources = (sources * ref.std() + ref.mean()).cpu().numpy()
sources[[0,1]] = sources[[1,0]]
return sources
data = { data = {
# Paths # Paths
@@ -694,11 +740,11 @@ data = {
'overlap': 0.5, 'overlap': 0.5,
'shifts': 0, 'shifts': 0,
'margin': 44100, 'margin': 44100,
'channel': 64, 'split_mode': False,
'compensate': 1.03597672895, 'compensate': 1.03597672895,
'demucs_only': False, 'demucs_only': False,
'mixing': 'Default', 'mixing': 'Default',
'DemucsModel': 'demucs_extra-3646af93_org.th', 'DemucsModel_MDX': 'UVR_Demucs_Model_1',
# Choose Model # Choose Model
'mdxnetModel': 'UVR-MDX-NET 1', 'mdxnetModel': 'UVR-MDX-NET 1',
'mdxnetModeltype': 'Vocals (Custom)', 'mdxnetModeltype': 'Vocals (Custom)',
@@ -751,6 +797,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
global model_set_name global model_set_name
global stemset_n global stemset_n
global noise_pro_set global noise_pro_set
global demucs_model_set
global mdx_model_hash global mdx_model_hash
@@ -759,6 +806,9 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
global overlap_set global overlap_set
global shift_set global shift_set
global source_val global source_val
global split_mode
global demucs_switch
# Update default settings # Update default settings
default_chunks = data['chunks'] default_chunks = data['chunks']
@@ -823,161 +873,90 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
source_val_set = 0 source_val_set = 0
stem_name = '(Bass)' stem_name = '(Bass)'
if data['mdxnetModel'] == 'UVR-MDX-NET 1':
try: if os.path.isfile('models/MDX_Net_Models/UVR_MDXNET_1_9703.onnx'):
if data['mdxnetModel'] == 'UVR-MDX-NET 1':
model_set = 'UVR_MDXNET_1_9703' model_set = 'UVR_MDXNET_1_9703'
model_set_name = 'UVR_MDXNET_1_9703' model_set_name = 'UVR_MDXNET_1_9703'
modeltype = 'v'
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
stemset_n = '(Vocals)'
source_val = 3
n_fft_scale_set=6144
dim_f_set=2048
elif data['mdxnetModel'] == 'UVR-MDX-NET 2':
model_set = 'UVR_MDXNET_2_9682'
model_set_name = 'UVR_MDXNET_2_9682'
modeltype = 'v'
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
stemset_n = '(Vocals)'
source_val = 3
n_fft_scale_set=6144
dim_f_set=2048
elif data['mdxnetModel'] == 'UVR-MDX-NET 3':
model_set = 'UVR_MDXNET_3_9662'
model_set_name = 'UVR_MDXNET_3_9662'
modeltype = 'v'
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
stemset_n = '(Vocals)'
source_val = 3
n_fft_scale_set=6144
dim_f_set=2048
elif data['mdxnetModel'] == 'UVR-MDX-NET Karaoke':
model_set = 'UVR_MDXNET_KARA'
model_set_name = 'UVR_MDXNET_Karaoke'
modeltype = 'v'
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
stemset_n = '(Vocals)'
source_val = 3
n_fft_scale_set=6144
dim_f_set=2048
elif data['mdxnetModel'] == 'other':
model_set = 'other'
model_set_name = 'other'
modeltype = 'o'
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
stemset_n = '(Other)'
source_val = 2
n_fft_scale_set=8192
dim_f_set=2048
elif data['mdxnetModel'] == 'drums':
model_set = 'drums'
model_set_name = 'drums'
modeltype = 'd'
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
stemset_n = '(Drums)'
source_val = 1
n_fft_scale_set=4096
dim_f_set=2048
elif data['mdxnetModel'] == 'bass':
model_set = 'bass'
model_set_name = 'bass'
modeltype = 'b'
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
stemset_n = '(Bass)'
source_val = 0
n_fft_scale_set=16384
dim_f_set=2048
else: else:
model_set = data['mdxnetModel']
model_set_name = data['mdxnetModel']
modeltype = stemset
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
stemset_n = stem_name
source_val = source_val_set
n_fft_scale_set=int(data['n_fft_scale'])
dim_f_set=int(data['dim_f'])
MDXModelName=('models/MDX_Net_Models/' + model_set + '.onnx')
mdx_model_hash = hashlib.md5(open(MDXModelName, 'rb').read()).hexdigest()
print(mdx_model_hash)
except:
if data['mdxnetModel'] == 'UVR-MDX-NET 1':
model_set = 'UVR_MDXNET_9703' model_set = 'UVR_MDXNET_9703'
model_set_name = 'UVR_MDXNET_9703' model_set_name = 'UVR_MDXNET_9703'
modeltype = 'v' modeltype = 'v'
noise_pro = 'MDX-NET_Noise_Profile_14_kHz' noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
stemset_n = '(Vocals)' stemset_n = '(Vocals)'
source_val = 3 source_val = 3
n_fft_scale_set=6144 n_fft_scale_set=6144
dim_f_set=2048 dim_f_set=2048
elif data['mdxnetModel'] == 'UVR-MDX-NET 2': elif data['mdxnetModel'] == 'UVR-MDX-NET 2':
if os.path.isfile('models/MDX_Net_Models/UVR_MDXNET_2_9682.onnx'):
model_set = 'UVR_MDXNET_2_9682'
model_set_name = 'UVR_MDXNET_2_9682'
else:
model_set = 'UVR_MDXNET_9682' model_set = 'UVR_MDXNET_9682'
model_set_name = 'UVR_MDXNET_9682' model_set_name = 'UVR_MDXNET_9682'
modeltype = 'v' modeltype = 'v'
noise_pro = 'MDX-NET_Noise_Profile_14_kHz' noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
stemset_n = '(Vocals)' stemset_n = '(Vocals)'
source_val = 3 source_val = 3
n_fft_scale_set=6144 n_fft_scale_set=6144
dim_f_set=2048 dim_f_set=2048
elif data['mdxnetModel'] == 'UVR-MDX-NET 3': elif data['mdxnetModel'] == 'UVR-MDX-NET 3':
if os.path.isfile('models/MDX_Net_Models/UVR_MDXNET_3_9662.onnx'):
model_set = 'UVR_MDXNET_3_9662'
model_set_name = 'UVR_MDXNET_3_9662'
else:
model_set = 'UVR_MDXNET_9662' model_set = 'UVR_MDXNET_9662'
model_set_name = 'UVR_MDXNET_9662' model_set_name = 'UVR_MDXNET_9662'
modeltype = 'v' modeltype = 'v'
noise_pro = 'MDX-NET_Noise_Profile_14_kHz' noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
stemset_n = '(Vocals)' stemset_n = '(Vocals)'
source_val = 3 source_val = 3
n_fft_scale_set=6144 n_fft_scale_set=6144
dim_f_set=2048 dim_f_set=2048
elif data['mdxnetModel'] == 'UVR-MDX-NET Karaoke': elif data['mdxnetModel'] == 'UVR-MDX-NET Karaoke':
model_set = 'UVR_MDXNET_KARA' model_set = 'UVR_MDXNET_KARA'
model_set_name = 'UVR_MDXNET_Karaoke' model_set_name = 'UVR_MDXNET_Karaoke'
modeltype = 'v' modeltype = 'v'
noise_pro = 'MDX-NET_Noise_Profile_14_kHz' noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
stemset_n = '(Vocals)' stemset_n = '(Vocals)'
source_val = 3 source_val = 3
n_fft_scale_set=6144 n_fft_scale_set=6144
dim_f_set=2048 dim_f_set=2048
elif data['mdxnetModel'] == 'other': elif 'other' in data['mdxnetModel']:
model_set = 'other' model_set = 'other'
model_set_name = 'other' model_set_name = 'other'
modeltype = 'o' modeltype = 'o'
noise_pro = 'MDX-NET_Noise_Profile_Full_Band' noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
stemset_n = '(Other)' stemset_n = '(Other)'
source_val = 2 source_val = 2
n_fft_scale_set=8192 n_fft_scale_set=8192
dim_f_set=2048 dim_f_set=2048
elif data['mdxnetModel'] == 'drums': elif 'drums' in data['mdxnetModel']:
model_set = 'drums' model_set = 'drums'
model_set_name = 'drums' model_set_name = 'drums'
modeltype = 'd' modeltype = 'd'
noise_pro = 'MDX-NET_Noise_Profile_Full_Band' noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
stemset_n = '(Drums)' stemset_n = '(Drums)'
source_val = 1 source_val = 1
n_fft_scale_set=4096 n_fft_scale_set=4096
dim_f_set=2048 dim_f_set=2048
elif data['mdxnetModel'] == 'bass': elif 'bass' in data['mdxnetModel']:
model_set = 'bass' model_set = 'bass'
model_set_name = 'bass' model_set_name = 'bass'
modeltype = 'b' modeltype = 'b'
noise_pro = 'MDX-NET_Noise_Profile_Full_Band' noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
stemset_n = '(Bass)' stemset_n = '(Bass)'
source_val = 0 source_val = 0
n_fft_scale_set=16384 n_fft_scale_set=16384
dim_f_set=2048 dim_f_set=2048
else: else:
model_set = data['mdxnetModel'] model_set = data['mdxnetModel']
model_set_name = data['mdxnetModel'] model_set_name = data['mdxnetModel']
modeltype = stemset modeltype = stemset
noise_pro = 'MDX-NET_Noise_Profile_Full_Band' noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
stemset_n = stem_name stemset_n = stem_name
source_val = source_val_set source_val = source_val_set
n_fft_scale_set=int(data['n_fft_scale']) n_fft_scale_set=int(data['n_fft_scale'])
dim_f_set=int(data['dim_f']) dim_f_set=int(data['dim_f'])
MDXModelName=('models/MDX_Net_Models/' + model_set_name + '.onnx')
mdx_model_hash = hashlib.md5(open(MDXModelName, 'rb').read()).hexdigest()
print(mdx_model_hash)
if data['noise_pro_select'] == 'Auto Select': if data['noise_pro_select'] == 'Auto Select':
@@ -988,12 +967,8 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
print(n_fft_scale_set) print(n_fft_scale_set)
print(dim_f_set) print(dim_f_set)
print(data['DemucsModel']) print(data['DemucsModel_MDX'])
overlap_set = float(data['overlap'])
channel_set = int(data['channel'])
margin_set = int(data['margin'])
shift_set = int(data['shifts'])
stime = time.perf_counter() stime = time.perf_counter()
progress_var.set(0) progress_var.set(0)
@@ -1002,7 +977,46 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
try: #Load File(s) try: #Load File(s)
for file_num, music_file in tqdm(enumerate(data['input_paths'], start=1)): for file_num, music_file in tqdm(enumerate(data['input_paths'], start=1)):
overlap_set = float(data['overlap'])
channel_set = int(data['channel'])
margin_set = int(data['margin'])
shift_set = int(data['shifts'])
demucs_model_set = data['DemucsModel_MDX']
split_mode = data['split_mode']
demucs_switch = data['demucsmodel']
if stemset_n == '(Bass)':
if 'UVR' in demucs_model_set:
text_widget.write('The selected Demucs model can only be used with vocal stems.\n')
text_widget.write('Please select a 4 stem Demucs model and try again.\n\n')
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}')
progress_var.set(0)
button_widget.configure(state=tk.NORMAL) # Enable Button
return
else:
pass
if stemset_n == '(Drums)':
if 'UVR' in demucs_model_set:
text_widget.write('The selected Demucs model can only be used with vocal stems.\n')
text_widget.write('Please select a 4 stem Demucs model and try again.\n\n')
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}')
progress_var.set(0)
button_widget.configure(state=tk.NORMAL) # Enable Button
return
else:
pass
if stemset_n == '(Other)':
if 'UVR' in demucs_model_set:
text_widget.write('The selected Demucs model can only be used with vocal stems.\n')
text_widget.write('Please select a 4 stem Demucs model and try again.\n\n')
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}')
progress_var.set(0)
button_widget.configure(state=tk.NORMAL) # Enable Button
return
else:
pass
_mixture = f'{data["input_paths"]}' _mixture = f'{data["input_paths"]}'
_basename = f'{data["export_path"]}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' _basename = f'{data["export_path"]}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}'
@@ -1063,11 +1077,10 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
e = os.path.join(data["export_path"]) e = os.path.join(data["export_path"])
demucsmodel = 'models/Demucs_Model/' + str(data['DemucsModel']) demucsmodel = 'models/Demucs_Models/' + str(data['DemucsModel_MDX'])
pred = Predictor() pred = Predictor()
pred.prediction_setup(demucs_name=demucsmodel, pred.prediction_setup()
channels=channel_set)
print(demucsmodel) print(demucsmodel)
@@ -1373,7 +1386,10 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
text_widget.write("\n" + f'Please address the error and try again.' + "\n") text_widget.write("\n" + f'Please address the error and try again.' + "\n")
text_widget.write(f'If this error persists, please contact the developers with the error details.\n\n') text_widget.write(f'If this error persists, please contact the developers with the error details.\n\n')
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}')
torch.cuda.empty_cache() try:
torch.cuda.empty_cache()
except:
pass
button_widget.configure(state=tk.NORMAL) # Enable Button button_widget.configure(state=tk.NORMAL) # Enable Button
return return

1225
inference_demucs.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,5 @@
from functools import total_ordering
import os import os
import importlib import importlib
from statistics import mode
import pydub import pydub
import shutil import shutil
import hashlib import hashlib
@@ -1006,7 +1004,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
with open('errorlog.txt', 'w') as f: with open('errorlog.txt', 'w') as f:
f.write(f'Last Error Received:\n\n' + f.write(f'Last Error Received:\n\n' +
f'Error Received while processing "{os.path.basename(music_file)}":\n' + f'Error Received while processing "{os.path.basename(music_file)}":\n' +
f'Process Method: Ensemble Mode\n\n' + f'Process Method: VR Architecture\n\n' +
f'Could not write audio file.\n' + f'Could not write audio file.\n' +
f'This could be due to low storage on target device or a system permissions issue.\n' + f'This could be due to low storage on target device or a system permissions issue.\n' +
f'If the error persists, please contact the developers.\n\n' + f'If the error persists, please contact the developers.\n\n' +
@@ -1031,7 +1029,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
with open('errorlog.txt', 'w') as f: with open('errorlog.txt', 'w') as f:
f.write(f'Last Error Received:\n\n' + f.write(f'Last Error Received:\n\n' +
f'Error Received while processing "{os.path.basename(music_file)}":\n' + f'Error Received while processing "{os.path.basename(music_file)}":\n' +
f'Process Method: Ensemble Mode\n\n' + f'Process Method: VR Architecture\n\n' +
f'The application was unable to allocate enough system memory to use this model.\n' + f'The application was unable to allocate enough system memory to use this model.\n' +
f'Please do the following:\n\n1. Restart this application.\n2. Ensure any CPU intensive applications are closed.\n3. Then try again.\n\n' + f'Please do the following:\n\n1. Restart this application.\n2. Ensure any CPU intensive applications are closed.\n3. Then try again.\n\n' +
f'Please Note: Intel Pentium and Intel Celeron processors do not work well with this application.\n\n' + f'Please Note: Intel Pentium and Intel Celeron processors do not work well with this application.\n\n' +

View File

@@ -11,8 +11,10 @@ import subprocess
import soundfile as sf import soundfile as sf
import torch import torch
import numpy as np import numpy as np
from demucs.model import Demucs from demucs.pretrained import get_model as _gm
from demucs.utils import apply_model from demucs.hdemucs import HDemucs
from demucs.apply import BagOfModels, apply_model
import pathlib
from models import get_models, spec_effects from models import get_models, spec_effects
import onnxruntime as ort import onnxruntime as ort
import time import time
@@ -47,32 +49,36 @@ class Predictor():
def __init__(self): def __init__(self):
pass pass
def prediction_setup(self, demucs_name, def prediction_setup(self):
channels=64):
global device global device
print('Print the gpu setting: ', data['gpu'])
if data['gpu'] >= 0: if data['gpu'] >= 0:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
if data['gpu'] == -1: if data['gpu'] == -1:
device = torch.device('cpu') device = torch.device('cpu')
if data['demucsmodel']: if demucs_switch == 'on':
self.demucs = Demucs(sources=["drums", "bass", "other", "vocals"], channels=channels) if 'UVR' in demucs_model_set:
self.demucs = HDemucs(sources=["other", "vocals"])
else:
self.demucs = HDemucs(sources=["drums", "bass", "other", "vocals"])
widget_text.write(base_text + 'Loading Demucs model... ') widget_text.write(base_text + 'Loading Demucs model... ')
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
step=0.05) step=0.05)
path_d = Path('models/Demucs_Models')
self.demucs = _gm(name=demucs_model_set, repo=path_d)
self.demucs.to(device) self.demucs.to(device)
self.demucs.load_state_dict(torch.load(demucs_name))
widget_text.write('Done!\n')
self.demucs.eval() self.demucs.eval()
widget_text.write('Done!\n')
if isinstance(self.demucs, BagOfModels):
widget_text.write(base_text + f"Selected Demucs model is a bag of {len(self.demucs.models)} model(s).\n")
self.onnx_models = {} self.onnx_models = {}
c = 0 c = 0
self.models = get_models('tdf_extra', load=False, device=cpu, stems=modeltype, n_fft_scale=n_fft_scale_set, dim_f=dim_f_set) self.models = get_models('tdf_extra', load=False, device=cpu, stems=modeltype, n_fft_scale=n_fft_scale_set, dim_f=dim_f_set)
if not data['demucs_only']: if demucs_only == 'off':
widget_text.write(base_text + 'Loading ONNX model... ') widget_text.write(base_text + 'Loading ONNX model... ')
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
@@ -88,24 +94,26 @@ class Predictor():
run_type = ['CPUExecutionProvider'] run_type = ['CPUExecutionProvider']
elif data['gpu'] == -1: elif data['gpu'] == -1:
run_type = ['CPUExecutionProvider'] run_type = ['CPUExecutionProvider']
print(run_type)
print(str(device))
print('model_set: ', model_set) if demucs_only == 'off':
self.onnx_models[c] = ort.InferenceSession(os.path.join('models/MDX_Net_Models', model_set), providers=run_type) self.onnx_models[c] = ort.InferenceSession(os.path.join('models/MDX_Net_Models', model_set), providers=run_type)
print(demucs_model_set)
if not data['demucs_only']:
widget_text.write('Done!\n') widget_text.write('Done!\n')
elif demucs_only == 'on':
print(demucs_model_set)
pass
def prediction(self, m): def prediction(self, m):
#mix, rate = sf.read(m)
mix, rate = librosa.load(m, mono=False, sr=44100) mix, samplerate = librosa.load(m, mono=False, sr=44100)
if mix.ndim == 1: if mix.ndim == 1:
mix = np.asfortranarray([mix,mix]) mix = np.asfortranarray([mix,mix])
samplerate = samplerate
mix = mix.T mix = mix.T
sources = self.demix(mix.T) sources = self.demix(mix.T)
widget_text.write(base_text + 'Inferences complete!\n') widget_text.write(base_text + 'Inferences complete!\n')
c = -1 c = -1
#Main Save Path #Main Save Path
@@ -154,20 +162,22 @@ class Predictor():
else: else:
file_exists = 'not_there' file_exists = 'not_there'
if demucs_only == 'on':
data['noisereduc_s'] == 'None'
if not data['noisereduc_s'] == 'None': if not data['noisereduc_s'] == 'None':
c += 1 c += 1
if not data['demucsmodel']: if demucs_switch == 'off':
if data['inst_only'] and not data['voc_only']: if data['inst_only'] and not data['voc_only']:
widget_text.write(base_text + 'Preparing to save Instrumental...') widget_text.write(base_text + 'Preparing to save Instrumental...')
else: else:
widget_text.write(base_text + 'Saving vocals... ') widget_text.write(base_text + 'Saving vocals... ')
sf.write(non_reduced_vocal_path, sources[c].T, rate) sf.write(non_reduced_vocal_path, sources[c].T, samplerate)
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
step=(0.9)) step=(0.9))
widget_text.write('Done!\n') widget_text.write('Done!\n')
widget_text.write(base_text + 'Performing Noise Reduction... ') widget_text.write(base_text + 'Performing Noise Reduction... ')
reduction_sen = float(int(data['noisereduc_s'])/10) reduction_sen = float(int(data['noisereduc_s'])/10)
print(noise_pro_set)
subprocess.call("lib_v5\\sox\\sox.exe" + ' "' + subprocess.call("lib_v5\\sox\\sox.exe" + ' "' +
f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' + f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' +
"noisered lib_v5\\sox\\" + noise_pro_set + ".prof " + f"{reduction_sen}", "noisered lib_v5\\sox\\" + noise_pro_set + ".prof " + f"{reduction_sen}",
@@ -181,31 +191,49 @@ class Predictor():
widget_text.write(base_text + 'Preparing Instrumental...') widget_text.write(base_text + 'Preparing Instrumental...')
else: else:
widget_text.write(base_text + 'Saving Vocals... ') widget_text.write(base_text + 'Saving Vocals... ')
sf.write(non_reduced_vocal_path, sources[3].T, rate) if demucs_only == 'on':
update_progress(**progress_kwargs, if 'UVR' in model_set_name:
step=(0.9)) sf.write(vocal_path, sources[1].T, samplerate)
widget_text.write('Done!\n') update_progress(**progress_kwargs,
widget_text.write(base_text + 'Performing Noise Reduction... ') step=(0.95))
reduction_sen = float(data['noisereduc_s'])/10 widget_text.write('Done!\n')
subprocess.call("lib_v5\\sox\\sox.exe" + ' "' + if 'extra' in model_set_name:
f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' + sf.write(vocal_path, sources[3].T, samplerate)
"noisered lib_v5\\sox\\" + noise_pro_set + ".prof " + f"{reduction_sen}", update_progress(**progress_kwargs,
shell=True, stdout=subprocess.PIPE, step=(0.95))
stdin=subprocess.PIPE, stderr=subprocess.PIPE) widget_text.write('Done!\n')
update_progress(**progress_kwargs, else:
step=(0.95)) sf.write(non_reduced_vocal_path, sources[3].T, samplerate)
widget_text.write('Done!\n') update_progress(**progress_kwargs,
step=(0.9))
widget_text.write('Done!\n')
widget_text.write(base_text + 'Performing Noise Reduction... ')
reduction_sen = float(data['noisereduc_s'])/10
subprocess.call("lib_v5\\sox\\sox.exe" + ' "' +
f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' +
"noisered lib_v5\\sox\\" + noise_pro_set + ".prof " + f"{reduction_sen}",
shell=True, stdout=subprocess.PIPE,
stdin=subprocess.PIPE, stderr=subprocess.PIPE)
update_progress(**progress_kwargs,
step=(0.95))
widget_text.write('Done!\n')
else: else:
c += 1 c += 1
if not data['demucsmodel']: if demucs_switch == 'off':
widget_text.write(base_text + 'Saving Vocals..') widget_text.write(base_text + 'Saving Vocals..')
sf.write(vocal_path, sources[c].T, rate) sf.write(vocal_path, sources[c].T, samplerate)
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
step=(0.9)) step=(0.9))
widget_text.write('Done!\n') widget_text.write('Done!\n')
else: else:
widget_text.write(base_text + 'Saving Vocals... ') widget_text.write(base_text + 'Saving Vocals... ')
sf.write(vocal_path, sources[3].T, rate) if demucs_only == 'on':
if 'UVR' in model_set_name:
sf.write(vocal_path, sources[1].T, samplerate)
if 'extra' in model_set_name:
sf.write(vocal_path, sources[3].T, samplerate)
else:
sf.write(vocal_path, sources[3].T, samplerate)
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
step=(0.9)) step=(0.9))
widget_text.write('Done!\n') widget_text.write('Done!\n')
@@ -355,23 +383,36 @@ class Predictor():
segmented_mix[skip] = mix[:,start:end].copy() segmented_mix[skip] = mix[:,start:end].copy()
if end == samples: if end == samples:
break break
if not data['demucsmodel']: if demucs_switch == 'off':
sources = self.demix_base(segmented_mix, margin_size=margin) sources = self.demix_base(segmented_mix, margin_size=margin)
elif data['demucs_only']: elif demucs_only == 'on':
sources = self.demix_demucs(segmented_mix, margin_size=margin) if split_mode == True:
sources = self.demix_demucs_split(mix)
if split_mode == False:
sources = self.demix_demucs(segmented_mix, margin_size=margin)
else: # both, apply spec effects else: # both, apply spec effects
base_out = self.demix_base(segmented_mix, margin_size=margin) base_out = self.demix_base(segmented_mix, margin_size=margin)
demucs_out = self.demix_demucs(segmented_mix, margin_size=margin) if split_mode == True:
demucs_out = self.demix_demucs_split(mix)
if split_mode == False:
demucs_out = self.demix_demucs(segmented_mix, margin_size=margin)
nan_count = np.count_nonzero(np.isnan(demucs_out)) + np.count_nonzero(np.isnan(base_out)) nan_count = np.count_nonzero(np.isnan(demucs_out)) + np.count_nonzero(np.isnan(base_out))
if nan_count > 0: if nan_count > 0:
print('Warning: there are {} nan values in the array(s).'.format(nan_count)) print('Warning: there are {} nan values in the array(s).'.format(nan_count))
demucs_out, base_out = np.nan_to_num(demucs_out), np.nan_to_num(base_out) demucs_out, base_out = np.nan_to_num(demucs_out), np.nan_to_num(base_out)
sources = {} sources = {}
sources[3] = (spec_effects(wave=[demucs_out[3],base_out[0]], if 'UVR' in demucs_model_set:
algorithm=data['mixing'], sources[3] = (spec_effects(wave=[demucs_out[1],base_out[0]],
value=b[3])*float(data['compensate'])) # compensation algorithm=data['mixing'],
value=b[3])*float(data['compensate'])) # compensation
else:
sources[3] = (spec_effects(wave=[demucs_out[3],base_out[0]],
algorithm=data['mixing'],
value=b[3])*float(data['compensate'])) # compensation
return sources return sources
def demix_base(self, mixes, margin_size): def demix_base(self, mixes, margin_size):
@@ -384,7 +425,7 @@ class Predictor():
print(' Running ONNX Inference...') print(' Running ONNX Inference...')
for mix in mixes: for mix in mixes:
gui_progress_bar_onnx += 1 gui_progress_bar_onnx += 1
if data['demucsmodel']: if demucs_switch == 'on':
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
step=(0.1 + (0.5/onnxitera_calc * gui_progress_bar_onnx))) step=(0.1 + (0.5/onnxitera_calc * gui_progress_bar_onnx)))
else: else:
@@ -430,13 +471,18 @@ class Predictor():
return _sources return _sources
def demix_demucs(self, mix, margin_size): def demix_demucs(self, mix, margin_size):
print('shift_set ', shift_set)
processed = {} processed = {}
demucsitera = len(mix) demucsitera = len(mix)
demucsitera_calc = demucsitera * 2 demucsitera_calc = demucsitera * 2
gui_progress_bar_demucs = 0 gui_progress_bar_demucs = 0
widget_text.write(base_text + "Split Mode is off. (Chunks enabled for Demucs Model)\n")
widget_text.write(base_text + "Running Demucs Inference...\n") widget_text.write(base_text + "Running Demucs Inference...\n")
widget_text.write(base_text + "Processing "f"{len(mix)} slices... ") widget_text.write(base_text + "Processing "f"{len(mix)} slices... ")
print(' Running Demucs Inference...') print('Running Demucs Inference...')
for nmix in mix: for nmix in mix:
gui_progress_bar_demucs += 1 gui_progress_bar_demucs += 1
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
@@ -446,7 +492,7 @@ class Predictor():
ref = cmix.mean(0) ref = cmix.mean(0)
cmix = (cmix - ref.mean()) / ref.std() cmix = (cmix - ref.mean()) / ref.std()
with torch.no_grad(): with torch.no_grad():
sources = apply_model(self.demucs, cmix.to(device), split=True, overlap=overlap_set, shifts=shift_set) sources = apply_model(self.demucs, cmix[None], split=split_mode, device=device, overlap=overlap_set, shifts=shift_set, progress=False)[0]
sources = (sources * ref.std() + ref.mean()).cpu().numpy() sources = (sources * ref.std() + ref.mean()).cpu().numpy()
sources[[0,1]] = sources[[1,0]] sources[[0,1]] = sources[[1,0]]
@@ -461,6 +507,26 @@ class Predictor():
widget_text.write('Done!\n') widget_text.write('Done!\n')
return sources return sources
def demix_demucs_split(self, mix):
print('shift_set ', shift_set)
widget_text.write(base_text + "Split Mode is on. (Chunks disabled for Demucs Model)\n")
widget_text.write(base_text + "Running Demucs Inference...\n")
widget_text.write(base_text + "Processing "f"{len(mix)} slices... ")
print(' Running Demucs Inference...')
mix = torch.tensor(mix, dtype=torch.float32)
ref = mix.mean(0)
mix = (mix - ref.mean()) / ref.std()
with torch.no_grad():
sources = apply_model(self.demucs, mix[None], split=split_mode, device=device, overlap=overlap_set, shifts=shift_set, progress=False)[0]
widget_text.write('Done!\n')
sources = (sources * ref.std() + ref.mean()).cpu().numpy()
sources[[0,1]] = sources[[1,0]]
return sources
def update_progress(progress_var, total_files, file_num, step: float = 1): def update_progress(progress_var, total_files, file_num, step: float = 1):
"""Calculate the progress for the progress widget in the GUI""" """Calculate the progress for the progress widget in the GUI"""
@@ -567,7 +633,7 @@ data = {
'chunks': 'auto', 'chunks': 'auto',
'non_red': False, 'non_red': False,
'noisereduc_s': 3, 'noisereduc_s': 3,
'ensChoose': 'Basic Ensemble', 'ensChoose': 'Basic VR Ensemble',
'algo': 'Instrumentals (Min Spec)', 'algo': 'Instrumentals (Min Spec)',
#Advanced Options #Advanced Options
'appendensem': False, 'appendensem': False,
@@ -575,11 +641,11 @@ data = {
'overlap': 0.5, 'overlap': 0.5,
'shifts': 0, 'shifts': 0,
'margin': 44100, 'margin': 44100,
'channel': 64, 'split_mode': False,
'compensate': 1.03597672895, 'compensate': 1.03597672895,
'demucs_only': False, 'demucs_only': False,
'mixing': 'Default', 'mixing': 'Default',
'DemucsModel': 'demucs_extra-3646af93_org.th', 'DemucsModel_MDX': 'UVR_Demucs_Model_1',
# Models # Models
'instrumentalModel': None, 'instrumentalModel': None,
@@ -627,17 +693,21 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
global ModelName_2 global ModelName_2
global mdx_model_hash global mdx_model_hash
global demucs_model_set
global channel_set global channel_set
global margin_set global margin_set
global overlap_set global overlap_set
global shift_set global shift_set
global noise_pro_set global noise_pro_set
global n_fft_scale_set global n_fft_scale_set
global dim_f_set global dim_f_set
global split_mode
global demucs_switch
global demucs_only
# Update default settings # Update default settings
default_chunks = data['chunks'] default_chunks = data['chunks']
default_noisereduc_s = data['noisereduc_s'] default_noisereduc_s = data['noisereduc_s']
@@ -665,12 +735,6 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
f'\nLast Conversion Time Stamp: [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]\n') f'\nLast Conversion Time Stamp: [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]\n')
except: except:
pass pass
overlap_set = float(data['overlap'])
channel_set = int(data['channel'])
margin_set = int(data['margin'])
shift_set = int(data['shifts'])
n_fft_scale_set=6144 n_fft_scale_set=6144
dim_f_set=2048 dim_f_set=2048
@@ -770,7 +834,26 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
# Separation Preperation # Separation Preperation
try: #Ensemble Dictionary try: #Ensemble Dictionary
if not data['ensChoose'] == 'User Ensemble': overlap_set = float(data['overlap'])
channel_set = int(data['channel'])
margin_set = int(data['margin'])
shift_set = int(data['shifts'])
demucs_model_set = data['DemucsModel_MDX']
split_mode = data['split_mode']
demucs_switch = data['demucsmodel']
if data['demucsmodel']:
demucs_switch = 'on'
else:
demucs_switch = 'off'
if data['demucs_only']:
demucs_only = 'on'
else:
demucs_only = 'off'
if not data['ensChoose'] == 'Manual Ensemble':
#1st Model #1st Model
@@ -1219,40 +1302,35 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
else: else:
vr_ensem_mdx_c_name = data['vr_ensem_mdx_c'] vr_ensem_mdx_c_name = data['vr_ensem_mdx_c']
vr_ensem_mdx_c = f'models/Main_Models/{vr_ensem_mdx_c_name}.pth' vr_ensem_mdx_c = f'models/Main_Models/{vr_ensem_mdx_c_name}.pth'
#MDX-Net Model #MDX-Net Model
try:
if data['mdx_ensem'] == 'UVR-MDX-NET 1':
mdx_ensem = 'UVR_MDXNET_1_9703'
if data['mdx_ensem'] == 'UVR-MDX-NET 2':
mdx_ensem = 'UVR_MDXNET_2_9682'
if data['mdx_ensem'] == 'UVR-MDX-NET 3':
mdx_ensem = 'UVR_MDXNET_3_9662'
if data['mdx_ensem'] == 'UVR-MDX-NET Karaoke':
mdx_ensem = 'UVR_MDXNET_KARA'
MDXModelName=('models/MDX_Net_Models/' + mdx_ensem + '.onnx')
mdx_model_hash = hashlib.md5(open(MDXModelName, 'rb').read()).hexdigest()
print(mdx_ensem)
except:
if data['mdx_ensem'] == 'UVR-MDX-NET 1':
mdx_ensem = 'UVR_MDXNET_9703'
if data['mdx_ensem'] == 'UVR-MDX-NET 2':
mdx_ensem = 'UVR_MDXNET_9682'
if data['mdx_ensem'] == 'UVR-MDX-NET 3':
mdx_ensem = 'UVR_MDXNET_9662'
if data['mdx_ensem'] == 'UVR-MDX-NET Karaoke':
mdx_ensem = 'UVR_MDXNET_KARA'
MDXModelName=('models/MDX_Net_Models/' + mdx_ensem + '.onnx')
mdx_model_hash = hashlib.md5(open(MDXModelName, 'rb').read()).hexdigest()
print(mdx_model_hash)
print(mdx_ensem)
if data['mdx_ensem'] == 'UVR-MDX-NET 1':
if os.path.isfile('models/MDX_Net_Models/UVR_MDXNET_1_9703.onnx'):
mdx_ensem = 'UVR_MDXNET_1_9703'
else:
mdx_ensem = 'UVR_MDXNET_9703'
if data['mdx_ensem'] == 'UVR-MDX-NET 2':
if os.path.isfile('models/MDX_Net_Models/UVR_MDXNET_2_9682.onnx'):
mdx_ensem = 'UVR_MDXNET_2_9682'
else:
mdx_ensem = 'UVR_MDXNET_9682'
if data['mdx_ensem'] == 'UVR-MDX-NET 3':
if os.path.isfile('models/MDX_Net_Models/UVR_MDXNET_3_9662.onnx'):
mdx_ensem = 'UVR_MDXNET_3_9662'
else:
mdx_ensem = 'UVR_MDXNET_9662'
if data['mdx_ensem'] == 'UVR-MDX-NET Karaoke':
mdx_ensem = 'UVR_MDXNET_KARA'
if data['mdx_ensem'] == 'Demucs UVR Model 1':
mdx_ensem = 'UVR_Demucs_Model_1'
if data['mdx_ensem'] == 'Demucs UVR Model 2':
mdx_ensem = 'UVR_Demucs_Model_2'
if data['mdx_ensem'] == 'Demucs mdx_extra':
mdx_ensem = 'mdx_extra'
if data['mdx_ensem'] == 'Demucs mdx_extra_q':
mdx_ensem = 'mdx_extra_q'
#MDX-Net Model 2 #MDX-Net Model 2
if data['mdx_ensem_b'] == 'UVR-MDX-NET 1': if data['mdx_ensem_b'] == 'UVR-MDX-NET 1':
@@ -1263,6 +1341,14 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
mdx_ensem_b = 'UVR_MDXNET_3_9662' mdx_ensem_b = 'UVR_MDXNET_3_9662'
if data['mdx_ensem_b'] == 'UVR-MDX-NET Karaoke': if data['mdx_ensem_b'] == 'UVR-MDX-NET Karaoke':
mdx_ensem_b = 'UVR_MDXNET_KARA' mdx_ensem_b = 'UVR_MDXNET_KARA'
if data['mdx_ensem_b'] == 'Demucs UVR Model 1':
mdx_ensem_b = 'UVR_Demucs_Model_1'
if data['mdx_ensem_b'] == 'Demucs UVR Model 2':
mdx_ensem_b = 'UVR_Demucs_Model_2'
if data['mdx_ensem_b'] == 'Demucs mdx_extra':
mdx_ensem_b = 'mdx_extra'
if data['mdx_ensem_b'] == 'Demucs mdx_extra_q':
mdx_ensem_b = 'mdx_extra_q'
if data['mdx_ensem_b'] == 'No Model': if data['mdx_ensem_b'] == 'No Model':
mdx_ensem_b = 'pass' mdx_ensem_b = 'pass'
@@ -1456,7 +1542,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
} }
] ]
if data['ensChoose'] == 'Basic Ensemble': if data['ensChoose'] == 'Basic VR Ensemble':
loops = Basic_Ensem loops = Basic_Ensem
ensefolder = 'Basic_Ensemble_Outputs' ensefolder = 'Basic_Ensemble_Outputs'
if data['vr_ensem_c'] == 'No Model' and data['vr_ensem_d'] == 'No Model' and data['vr_ensem_e'] == 'No Model': if data['vr_ensem_c'] == 'No Model' and data['vr_ensem_d'] == 'No Model' and data['vr_ensem_e'] == 'No Model':
@@ -1487,7 +1573,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
loops = Vocal_Models loops = Vocal_Models
ensefolder = 'Vocal_Models_Ensemble_Outputs' ensefolder = 'Vocal_Models_Ensemble_Outputs'
ensemode = 'Vocal_Models' ensemode = 'Vocal_Models'
if data['ensChoose'] == 'MDX-Net/VR Ensemble': if data['ensChoose'] == 'Multi-AI Ensemble':
loops = mdx_vr loops = mdx_vr
ensefolder = 'MDX_VR_Ensemble_Outputs' ensefolder = 'MDX_VR_Ensemble_Outputs'
if data['vr_ensem'] == 'No Model' and data['vr_ensem_mdx_a'] == 'No Model' and data['vr_ensem_mdx_b'] == 'No Model' and data['vr_ensem_mdx_c'] == 'No Model': if data['vr_ensem'] == 'No Model' and data['vr_ensem_mdx_a'] == 'No Model' and data['vr_ensem_mdx_b'] == 'No Model' and data['vr_ensem_mdx_c'] == 'No Model':
@@ -1511,7 +1597,6 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
#Prepare Audiofile(s) #Prepare Audiofile(s)
for file_num, music_file in enumerate(data['input_paths'], start=1): for file_num, music_file in enumerate(data['input_paths'], start=1):
print(data['input_paths'])
# -Get text and update progress- # -Get text and update progress-
base_text = get_baseText(total_files=len(data['input_paths']), base_text = get_baseText(total_files=len(data['input_paths']),
file_num=file_num) file_num=file_num)
@@ -1609,9 +1694,9 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
presentmodel = Path(c['model_location']) presentmodel = Path(c['model_location'])
if presentmodel.is_file(): if presentmodel.is_file():
print(f'The file {presentmodel} exist') print(f'The file {presentmodel} exists')
else: else:
if data['ensChoose'] == 'MDX-Net/VR Ensemble': if data['ensChoose'] == 'Multi-AI Ensemble':
text_widget.write(base_text + 'Model "' + c['model_name'] + '.pth" is missing.\n') text_widget.write(base_text + 'Model "' + c['model_name'] + '.pth" is missing.\n')
text_widget.write(base_text + 'Installation of v5 Model Expansion Pack required to use this model.\n') text_widget.write(base_text + 'Installation of v5 Model Expansion Pack required to use this model.\n')
text_widget.write(base_text + f'If the error persists, please verify all models are present.\n\n') text_widget.write(base_text + f'If the error persists, please verify all models are present.\n\n')
@@ -1963,7 +2048,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
text_widget.write(base_text + 'Completed Seperation!\n\n') text_widget.write(base_text + 'Completed Seperation!\n\n')
if data['ensChoose'] == 'MDX-Net/VR Ensemble': if data['ensChoose'] == 'Multi-AI Ensemble':
mdx_name = c['mdx_model_name'] mdx_name = c['mdx_model_name']
@@ -1973,46 +2058,77 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
text_widget.write('Ensemble Mode - Running Model - ' + mdx_name + '\n\n') text_widget.write('Ensemble Mode - Running Model - ' + mdx_name + '\n\n')
if mdx_name == 'UVR_MDXNET_1_9703': if mdx_name == 'UVR_MDXNET_1_9703':
demucs_only = 'off'
model_set = 'UVR_MDXNET_1_9703.onnx' model_set = 'UVR_MDXNET_1_9703.onnx'
model_set_name = 'UVR_MDXNET_1_9703' model_set_name = 'UVR_MDXNET_1_9703'
modeltype = 'v' modeltype = 'v'
demucs_model_set = data['DemucsModel_MDX']
noise_pro = 'MDX-NET_Noise_Profile_14_kHz' noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
if mdx_name == 'UVR_MDXNET_2_9682': if mdx_name == 'UVR_MDXNET_2_9682':
demucs_only = 'off'
model_set = 'UVR_MDXNET_2_9682.onnx' model_set = 'UVR_MDXNET_2_9682.onnx'
model_set_name = 'UVR_MDXNET_2_9682' model_set_name = 'UVR_MDXNET_2_9682'
modeltype = 'v' modeltype = 'v'
noise_pro = 'MDX-NET_Noise_Profile_14_kHz' noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
if mdx_name == 'UVR_MDXNET_3_9662': if mdx_name == 'UVR_MDXNET_3_9662':
demucs_only = 'off'
model_set = 'UVR_MDXNET_3_9662.onnx' model_set = 'UVR_MDXNET_3_9662.onnx'
model_set_name = 'UVR_MDXNET_3_9662' model_set_name = 'UVR_MDXNET_3_9662'
modeltype = 'v' modeltype = 'v'
demucs_model_set = data['DemucsModel_MDX']
noise_pro = 'MDX-NET_Noise_Profile_14_kHz' noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
if mdx_name == 'UVR_MDXNET_KARA': if mdx_name == 'UVR_MDXNET_KARA':
demucs_only = 'off'
model_set = 'UVR_MDXNET_KARA.onnx' model_set = 'UVR_MDXNET_KARA.onnx'
model_set_name = 'UVR_MDXNET_KARA' model_set_name = 'UVR_MDXNET_KARA'
modeltype = 'v' modeltype = 'v'
noise_pro = 'MDX-NET_Noise_Profile_14_kHz' noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
if mdx_name == 'UVR_MDXNET_9703': if mdx_name == 'UVR_MDXNET_9703':
demucs_only = 'off'
model_set = 'UVR_MDXNET_9703.onnx' model_set = 'UVR_MDXNET_9703.onnx'
model_set_name = 'UVR_MDXNET_9703' model_set_name = 'UVR_MDXNET_9703'
modeltype = 'v' modeltype = 'v'
demucs_model_set = data['DemucsModel_MDX']
noise_pro = 'MDX-NET_Noise_Profile_14_kHz' noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
if mdx_name == 'UVR_MDXNET_9682': if mdx_name == 'UVR_MDXNET_9682':
demucs_only = 'off'
model_set = 'UVR_MDXNET_9682.onnx' model_set = 'UVR_MDXNET_9682.onnx'
model_set_name = 'UVR_MDXNET_9682' model_set_name = 'UVR_MDXNET_9682'
modeltype = 'v' modeltype = 'v'
demucs_model_set = data['DemucsModel_MDX']
noise_pro = 'MDX-NET_Noise_Profile_14_kHz' noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
if mdx_name == 'UVR_MDXNET_9662': if mdx_name == 'UVR_MDXNET_9662':
demucs_only = 'off'
model_set = 'UVR_MDXNET_9662.onnx' model_set = 'UVR_MDXNET_9662.onnx'
model_set_name = 'UVR_MDXNET_9662' model_set_name = 'UVR_MDXNET_9662'
modeltype = 'v' modeltype = 'v'
demucs_model_set = data['DemucsModel_MDX']
noise_pro = 'MDX-NET_Noise_Profile_14_kHz' noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
if mdx_name == 'UVR_MDXNET_KARA': if mdx_name == 'UVR_MDXNET_KARA':
demucs_only = 'off'
model_set = 'UVR_MDXNET_KARA.onnx' model_set = 'UVR_MDXNET_KARA.onnx'
model_set_name = 'UVR_MDXNET_KARA' model_set_name = 'UVR_MDXNET_KARA'
modeltype = 'v' modeltype = 'v'
demucs_model_set = data['DemucsModel_MDX']
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
if 'Demucs' in mdx_name:
demucs_only = 'on'
demucs_switch = 'on'
demucs_model_set = mdx_name
model_set = ''
model_set_name = 'UVR'
modeltype = 'v'
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
if 'extra' in mdx_name:
demucs_only = 'on'
demucs_switch = 'on'
demucs_model_set = mdx_name
model_set = ''
model_set_name = 'extra'
modeltype = 'v'
noise_pro = 'MDX-NET_Noise_Profile_14_kHz' noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
print('demucs_only? ', demucs_only)
if data['noise_pro_select'] == 'Auto Select': if data['noise_pro_select'] == 'Auto Select':
noise_pro_set = noise_pro noise_pro_set = noise_pro
@@ -2033,12 +2149,9 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
widget_text.write(base_text + 'Noise Reduction will be disabled until SoX is available.\n\n') widget_text.write(base_text + 'Noise Reduction will be disabled until SoX is available.\n\n')
e = os.path.join(data["export_path"]) e = os.path.join(data["export_path"])
demucsmodel = 'models/Demucs_Model/' + str(data['DemucsModel'])
pred = Predictor() pred = Predictor()
pred.prediction_setup(demucs_name=demucsmodel, pred.prediction_setup()
channels=channel_set)
# split # split
pred.prediction( pred.prediction(
@@ -2502,7 +2615,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
try: try:
with open('errorlog.txt', 'w') as f: with open('errorlog.txt', 'w') as f:
f.write(f'Last Error Received:\n\n' + f.write(f'Last Error Received:\n\n' +
f'Error Received while attempting to run user ensemble:\n' + f'Error Received while attempting to run Manual Ensemble:\n' +
f'Process Method: Ensemble Mode\n\n' + f'Process Method: Ensemble Mode\n\n' +
f'FFmpeg might be missing or corrupted.\n\n' + f'FFmpeg might be missing or corrupted.\n\n' +
f'If this error persists, please contact the developers.\n\n' + f'If this error persists, please contact the developers.\n\n' +
@@ -2530,7 +2643,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
try: try:
with open('errorlog.txt', 'w') as f: with open('errorlog.txt', 'w') as f:
f.write(f'Last Error Received:\n\n' + f.write(f'Last Error Received:\n\n' +
f'Error Received while attempting to run user ensemble:\n' + f'Error Received while attempting to run Manual Ensemble:\n' +
f'Process Method: Ensemble Mode\n\n' + f'Process Method: Ensemble Mode\n\n' +
f'FFmpeg might be missing or corrupted.\n\n' + f'FFmpeg might be missing or corrupted.\n\n' +
f'If this error persists, please contact the developers.\n\n' + f'If this error persists, please contact the developers.\n\n' +
@@ -2899,11 +3012,10 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
update_progress(**progress_kwargs, update_progress(**progress_kwargs,
step=1) step=1)
print('Done!') print('Done!')
progress_var.set(0) progress_var.set(0)
if not data['ensChoose'] == 'User Ensemble': if not data['ensChoose'] == 'Manual Ensemble':
text_widget.write(base_text + f'Conversions Completed!\n') text_widget.write(base_text + f'Conversions Completed!\n')
elif data['algo'] == 'Instrumentals (Min Spec)' and len(data['input_paths']) <= 1 or data['algo'] == 'Vocals (Max Spec)' and len(data['input_paths']) <= 1: elif data['algo'] == 'Instrumentals (Min Spec)' and len(data['input_paths']) <= 1 or data['algo'] == 'Vocals (Max Spec)' and len(data['input_paths']) <= 1:
text_widget.write(base_text + f'Please select 2 or more files to use this feature and try again.\n') text_widget.write(base_text + f'Please select 2 or more files to use this feature and try again.\n')