Add files via upload
This commit is contained in:
408
inference_MDX.py
408
inference_MDX.py
@@ -1,7 +1,4 @@
|
|||||||
import os
|
import os
|
||||||
from pickle import STOP
|
|
||||||
from tracemalloc import stop
|
|
||||||
from turtle import update
|
|
||||||
import subprocess
|
import subprocess
|
||||||
from unittest import skip
|
from unittest import skip
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -11,14 +8,18 @@ import pydub
|
|||||||
import shutil
|
import shutil
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
import gc
|
|
||||||
#MDX-Net
|
#MDX-Net
|
||||||
#----------------------------------------
|
#----------------------------------------
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
import torch
|
import torch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from demucs.model import Demucs
|
|
||||||
from demucs.utils import apply_model
|
from demucs.pretrained import get_model as _gm
|
||||||
|
from demucs.hdemucs import HDemucs
|
||||||
|
from demucs.apply import BagOfModels, apply_model
|
||||||
|
from demucs.audio import AudioFile
|
||||||
|
import pathlib
|
||||||
|
|
||||||
from models import get_models, spec_effects
|
from models import get_models, spec_effects
|
||||||
import onnxruntime as ort
|
import onnxruntime as ort
|
||||||
import time
|
import time
|
||||||
@@ -38,12 +39,13 @@ import tkinter as tk
|
|||||||
import traceback # Error Message Recent Calls
|
import traceback # Error Message Recent Calls
|
||||||
import time # Timer
|
import time # Timer
|
||||||
|
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
class Predictor():
|
class Predictor():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def prediction_setup(self, demucs_name,
|
def prediction_setup(self):
|
||||||
channels=64):
|
|
||||||
|
|
||||||
global device
|
global device
|
||||||
|
|
||||||
@@ -55,20 +57,24 @@ class Predictor():
|
|||||||
device = torch.device('cpu')
|
device = torch.device('cpu')
|
||||||
|
|
||||||
if data['demucsmodel']:
|
if data['demucsmodel']:
|
||||||
self.demucs = Demucs(sources=["drums", "bass", "other", "vocals"], channels=channels)
|
if 'UVR' in demucs_model_set:
|
||||||
widget_text.write(base_text + 'Loading Demucs model... ')
|
self.demucs = HDemucs(sources=["other", "vocals"])
|
||||||
|
else:
|
||||||
|
self.demucs = HDemucs(sources=["drums", "bass", "other", "vocals"])
|
||||||
|
widget_text.write(base_text + 'Loading Demucs model...')
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=0.05)
|
step=0.05)
|
||||||
|
path_d = Path('models/Demucs_Models')
|
||||||
|
self.demucs = _gm(name=demucs_model_set, repo=path_d)
|
||||||
self.demucs.to(device)
|
self.demucs.to(device)
|
||||||
self.demucs.load_state_dict(torch.load(demucs_name))
|
|
||||||
widget_text.write('Done!\n')
|
|
||||||
self.demucs.eval()
|
self.demucs.eval()
|
||||||
|
widget_text.write('Done!\n')
|
||||||
|
if isinstance(self.demucs, BagOfModels):
|
||||||
|
widget_text.write(base_text + f"Selected Demucs model is a bag of {len(self.demucs.models)} model(s).\n")
|
||||||
|
|
||||||
self.onnx_models = {}
|
self.onnx_models = {}
|
||||||
c = 0
|
c = 0
|
||||||
|
|
||||||
print('stemtype: ', modeltype)
|
|
||||||
|
|
||||||
self.models = get_models('tdf_extra', load=False, device=cpu, stems=modeltype, n_fft_scale=n_fft_scale_set, dim_f=dim_f_set)
|
self.models = get_models('tdf_extra', load=False, device=cpu, stems=modeltype, n_fft_scale=n_fft_scale_set, dim_f=dim_f_set)
|
||||||
if not data['demucs_only']:
|
if not data['demucs_only']:
|
||||||
widget_text.write(base_text + 'Loading ONNX model... ')
|
widget_text.write(base_text + 'Loading ONNX model... ')
|
||||||
@@ -87,19 +93,17 @@ class Predictor():
|
|||||||
elif data['gpu'] == -1:
|
elif data['gpu'] == -1:
|
||||||
run_type = ['CPUExecutionProvider']
|
run_type = ['CPUExecutionProvider']
|
||||||
|
|
||||||
print(run_type)
|
print('Selected Model: ', model_set)
|
||||||
print(str(device))
|
|
||||||
|
|
||||||
self.onnx_models[c] = ort.InferenceSession(os.path.join('models/MDX_Net_Models', str(model_set) + '.onnx'), providers=run_type)
|
self.onnx_models[c] = ort.InferenceSession(os.path.join('models/MDX_Net_Models', str(model_set) + '.onnx'), providers=run_type)
|
||||||
|
|
||||||
if not data['demucs_only']:
|
if not data['demucs_only']:
|
||||||
widget_text.write('Done!\n')
|
widget_text.write('Done!\n')
|
||||||
|
|
||||||
def prediction(self, m):
|
def prediction(self, m):
|
||||||
#mix, rate = sf.read(m)
|
mix, samplerate = librosa.load(m, mono=False, sr=44100)
|
||||||
mix, rate = librosa.load(m, mono=False, sr=44100)
|
|
||||||
if mix.ndim == 1:
|
if mix.ndim == 1:
|
||||||
mix = np.asfortranarray([mix,mix])
|
mix = np.asfortranarray([mix,mix])
|
||||||
|
samplerate = samplerate
|
||||||
mix = mix.T
|
mix = mix.T
|
||||||
sources = self.demix(mix.T)
|
sources = self.demix(mix.T)
|
||||||
widget_text.write(base_text + 'Inferences complete!\n')
|
widget_text.write(base_text + 'Inferences complete!\n')
|
||||||
@@ -226,13 +230,12 @@ class Predictor():
|
|||||||
c += 1
|
c += 1
|
||||||
|
|
||||||
if not data['demucsmodel']:
|
if not data['demucsmodel']:
|
||||||
|
|
||||||
if data['inst_only']:
|
if data['inst_only']:
|
||||||
widget_text.write(base_text + 'Preparing to save Instrumental...')
|
widget_text.write(base_text + 'Preparing to save Instrumental...')
|
||||||
else:
|
else:
|
||||||
widget_text.write(base_text + 'Saving vocals... ')
|
widget_text.write(base_text + 'Saving vocals... ')
|
||||||
|
|
||||||
sf.write(non_reduced_vocal_path, sources[c].T, rate)
|
sf.write(non_reduced_vocal_path, sources[c].T, samplerate)
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=(0.9))
|
step=(0.9))
|
||||||
widget_text.write('Done!\n')
|
widget_text.write('Done!\n')
|
||||||
@@ -240,7 +243,7 @@ class Predictor():
|
|||||||
reduction_sen = float(int(data['noisereduc_s'])/10)
|
reduction_sen = float(int(data['noisereduc_s'])/10)
|
||||||
subprocess.call("lib_v5\\sox\\sox.exe" + ' "' +
|
subprocess.call("lib_v5\\sox\\sox.exe" + ' "' +
|
||||||
f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' +
|
f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' +
|
||||||
"noisered lib_v5\\sox\\mdxnetnoisereduc.prof " + f"{reduction_sen}",
|
"noisered lib_v5\\sox\\" + noise_pro_set + ".prof " + f"{reduction_sen}",
|
||||||
shell=True, stdout=subprocess.PIPE,
|
shell=True, stdout=subprocess.PIPE,
|
||||||
stdin=subprocess.PIPE, stderr=subprocess.PIPE)
|
stdin=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
widget_text.write('Done!\n')
|
widget_text.write('Done!\n')
|
||||||
@@ -252,7 +255,11 @@ class Predictor():
|
|||||||
else:
|
else:
|
||||||
widget_text.write(base_text + 'Saving Vocals... ')
|
widget_text.write(base_text + 'Saving Vocals... ')
|
||||||
|
|
||||||
sf.write(non_reduced_vocal_path, sources[3].T, rate)
|
if data['demucs_only']:
|
||||||
|
if 'UVR' in demucs_model_set:
|
||||||
|
sf.write(non_reduced_vocal_path, sources[1].T, samplerate)
|
||||||
|
else:
|
||||||
|
sf.write(non_reduced_vocal_path, sources[source_val].T, samplerate)
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=(0.9))
|
step=(0.9))
|
||||||
widget_text.write('Done!\n')
|
widget_text.write('Done!\n')
|
||||||
@@ -275,7 +282,7 @@ class Predictor():
|
|||||||
widget_text.write(base_text + 'Preparing Instrumental...')
|
widget_text.write(base_text + 'Preparing Instrumental...')
|
||||||
else:
|
else:
|
||||||
widget_text.write(base_text + 'Saving Vocals... ')
|
widget_text.write(base_text + 'Saving Vocals... ')
|
||||||
sf.write(vocal_path, sources[c].T, rate)
|
sf.write(vocal_path, sources[c].T, samplerate)
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=(0.9))
|
step=(0.9))
|
||||||
widget_text.write('Done!\n')
|
widget_text.write('Done!\n')
|
||||||
@@ -284,7 +291,15 @@ class Predictor():
|
|||||||
widget_text.write(base_text + 'Preparing Instrumental...')
|
widget_text.write(base_text + 'Preparing Instrumental...')
|
||||||
else:
|
else:
|
||||||
widget_text.write(base_text + 'Saving Vocals... ')
|
widget_text.write(base_text + 'Saving Vocals... ')
|
||||||
sf.write(vocal_path, sources[3].T, rate)
|
|
||||||
|
if data['demucs_only']:
|
||||||
|
if 'UVR' in demucs_model_set:
|
||||||
|
sf.write(vocal_path, sources[1].T, samplerate)
|
||||||
|
else:
|
||||||
|
sf.write(vocal_path, sources[source_val].T, samplerate)
|
||||||
|
else:
|
||||||
|
sf.write(vocal_path, sources[source_val].T, samplerate)
|
||||||
|
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=(0.9))
|
step=(0.9))
|
||||||
widget_text.write('Done!\n')
|
widget_text.write('Done!\n')
|
||||||
@@ -471,13 +486,6 @@ class Predictor():
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
print('Is there already a voc file there? ', file_exists_v)
|
|
||||||
print('Is there already a non_voc file there? ', file_exists_n)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if data['noisereduc_s'] == 'None':
|
if data['noisereduc_s'] == 'None':
|
||||||
pass
|
pass
|
||||||
elif data['non_red'] == True:
|
elif data['non_red'] == True:
|
||||||
@@ -568,22 +576,36 @@ class Predictor():
|
|||||||
if end == samples:
|
if end == samples:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
if not data['demucsmodel']:
|
if not data['demucsmodel']:
|
||||||
sources = self.demix_base(segmented_mix, margin_size=margin)
|
sources = self.demix_base(segmented_mix, margin_size=margin)
|
||||||
elif data['demucs_only']:
|
elif data['demucs_only']:
|
||||||
sources = self.demix_demucs(segmented_mix, margin_size=margin)
|
if split_mode == True:
|
||||||
|
sources = self.demix_demucs_split(mix)
|
||||||
|
if split_mode == False:
|
||||||
|
sources = self.demix_demucs(segmented_mix, margin_size=margin)
|
||||||
else: # both, apply spec effects
|
else: # both, apply spec effects
|
||||||
base_out = self.demix_base(segmented_mix, margin_size=margin)
|
base_out = self.demix_base(segmented_mix, margin_size=margin)
|
||||||
demucs_out = self.demix_demucs(segmented_mix, margin_size=margin)
|
print(split_mode)
|
||||||
|
if split_mode == True:
|
||||||
|
demucs_out = self.demix_demucs_split(mix)
|
||||||
|
if split_mode == False:
|
||||||
|
demucs_out = self.demix_demucs(segmented_mix, margin_size=margin)
|
||||||
nan_count = np.count_nonzero(np.isnan(demucs_out)) + np.count_nonzero(np.isnan(base_out))
|
nan_count = np.count_nonzero(np.isnan(demucs_out)) + np.count_nonzero(np.isnan(base_out))
|
||||||
if nan_count > 0:
|
if nan_count > 0:
|
||||||
print('Warning: there are {} nan values in the array(s).'.format(nan_count))
|
print('Warning: there are {} nan values in the array(s).'.format(nan_count))
|
||||||
demucs_out, base_out = np.nan_to_num(demucs_out), np.nan_to_num(base_out)
|
demucs_out, base_out = np.nan_to_num(demucs_out), np.nan_to_num(base_out)
|
||||||
sources = {}
|
sources = {}
|
||||||
print(data['mixing'])
|
print(data['mixing'])
|
||||||
sources[3] = (spec_effects(wave=[demucs_out[source_val],base_out[0]],
|
|
||||||
algorithm=data['mixing'],
|
if 'UVR' in demucs_model_set:
|
||||||
value=b[3])*float(data['compensate'])) # compensation
|
sources[source_val] = (spec_effects(wave=[demucs_out[1],base_out[0]],
|
||||||
|
algorithm=data['mixing'],
|
||||||
|
value=b[source_val])*float(data['compensate'])) # compensation
|
||||||
|
else:
|
||||||
|
sources[source_val] = (spec_effects(wave=[demucs_out[source_val],base_out[0]],
|
||||||
|
algorithm=data['mixing'],
|
||||||
|
value=b[source_val])*float(data['compensate'])) # compensation
|
||||||
return sources
|
return sources
|
||||||
|
|
||||||
def demix_base(self, mixes, margin_size):
|
def demix_base(self, mixes, margin_size):
|
||||||
@@ -594,6 +616,7 @@ class Predictor():
|
|||||||
widget_text.write(base_text + "Running ONNX Inference...\n")
|
widget_text.write(base_text + "Running ONNX Inference...\n")
|
||||||
widget_text.write(base_text + "Processing "f"{onnxitera} slices... ")
|
widget_text.write(base_text + "Processing "f"{onnxitera} slices... ")
|
||||||
print(' Running ONNX Inference...')
|
print(' Running ONNX Inference...')
|
||||||
|
|
||||||
for mix in mixes:
|
for mix in mixes:
|
||||||
gui_progress_bar_onnx += 1
|
gui_progress_bar_onnx += 1
|
||||||
if data['demucsmodel']:
|
if data['demucsmodel']:
|
||||||
@@ -602,6 +625,7 @@ class Predictor():
|
|||||||
else:
|
else:
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=(0.1 + (0.9/onnxitera * gui_progress_bar_onnx)))
|
step=(0.1 + (0.9/onnxitera * gui_progress_bar_onnx)))
|
||||||
|
|
||||||
cmix = mixes[mix]
|
cmix = mixes[mix]
|
||||||
sources = []
|
sources = []
|
||||||
n_sample = cmix.shape[1]
|
n_sample = cmix.shape[1]
|
||||||
@@ -634,7 +658,6 @@ class Predictor():
|
|||||||
end = None
|
end = None
|
||||||
sources.append(tar_signal[:,start:end])
|
sources.append(tar_signal[:,start:end])
|
||||||
|
|
||||||
|
|
||||||
chunked_sources.append(sources)
|
chunked_sources.append(sources)
|
||||||
_sources = np.concatenate(chunked_sources, axis=-1)
|
_sources = np.concatenate(chunked_sources, axis=-1)
|
||||||
del self.onnx_models
|
del self.onnx_models
|
||||||
@@ -647,6 +670,7 @@ class Predictor():
|
|||||||
demucsitera = len(mix)
|
demucsitera = len(mix)
|
||||||
demucsitera_calc = demucsitera * 2
|
demucsitera_calc = demucsitera * 2
|
||||||
gui_progress_bar_demucs = 0
|
gui_progress_bar_demucs = 0
|
||||||
|
widget_text.write(base_text + "Split Mode is off. (Chunks enabled for Demucs Model)\n")
|
||||||
widget_text.write(base_text + "Running Demucs Inference...\n")
|
widget_text.write(base_text + "Running Demucs Inference...\n")
|
||||||
widget_text.write(base_text + "Processing "f"{len(mix)} slices... ")
|
widget_text.write(base_text + "Processing "f"{len(mix)} slices... ")
|
||||||
print(' Running Demucs Inference...')
|
print(' Running Demucs Inference...')
|
||||||
@@ -659,7 +683,8 @@ class Predictor():
|
|||||||
ref = cmix.mean(0)
|
ref = cmix.mean(0)
|
||||||
cmix = (cmix - ref.mean()) / ref.std()
|
cmix = (cmix - ref.mean()) / ref.std()
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
sources = apply_model(self.demucs, cmix.to(device), split=True, overlap=overlap_set, shifts=shift_set)
|
print(split_mode)
|
||||||
|
sources = apply_model(self.demucs, cmix[None], split=split_mode, device=device, overlap=overlap_set, shifts=shift_set, progress=False)[0]
|
||||||
sources = (sources * ref.std() + ref.mean()).cpu().numpy()
|
sources = (sources * ref.std() + ref.mean()).cpu().numpy()
|
||||||
sources[[0,1]] = sources[[1,0]]
|
sources[[0,1]] = sources[[1,0]]
|
||||||
|
|
||||||
@@ -674,6 +699,27 @@ class Predictor():
|
|||||||
widget_text.write('Done!\n')
|
widget_text.write('Done!\n')
|
||||||
return sources
|
return sources
|
||||||
|
|
||||||
|
def demix_demucs_split(self, mix):
|
||||||
|
|
||||||
|
print('shift_set ', shift_set)
|
||||||
|
widget_text.write(base_text + "Split Mode is on. (Chunks disabled for Demucs Model)\n")
|
||||||
|
widget_text.write(base_text + "Running Demucs Inference...\n")
|
||||||
|
widget_text.write(base_text + "Processing "f"{len(mix)} slices... ")
|
||||||
|
print(' Running Demucs Inference...')
|
||||||
|
|
||||||
|
mix = torch.tensor(mix, dtype=torch.float32)
|
||||||
|
ref = mix.mean(0)
|
||||||
|
mix = (mix - ref.mean()) / ref.std()
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
sources = apply_model(self.demucs, mix[None], split=split_mode, device=device, overlap=overlap_set, shifts=shift_set, progress=False)[0]
|
||||||
|
|
||||||
|
widget_text.write('Done!\n')
|
||||||
|
|
||||||
|
sources = (sources * ref.std() + ref.mean()).cpu().numpy()
|
||||||
|
sources[[0,1]] = sources[[1,0]]
|
||||||
|
return sources
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
# Paths
|
# Paths
|
||||||
'input_paths': None,
|
'input_paths': None,
|
||||||
@@ -694,11 +740,11 @@ data = {
|
|||||||
'overlap': 0.5,
|
'overlap': 0.5,
|
||||||
'shifts': 0,
|
'shifts': 0,
|
||||||
'margin': 44100,
|
'margin': 44100,
|
||||||
'channel': 64,
|
'split_mode': False,
|
||||||
'compensate': 1.03597672895,
|
'compensate': 1.03597672895,
|
||||||
'demucs_only': False,
|
'demucs_only': False,
|
||||||
'mixing': 'Default',
|
'mixing': 'Default',
|
||||||
'DemucsModel': 'demucs_extra-3646af93_org.th',
|
'DemucsModel_MDX': 'UVR_Demucs_Model_1',
|
||||||
# Choose Model
|
# Choose Model
|
||||||
'mdxnetModel': 'UVR-MDX-NET 1',
|
'mdxnetModel': 'UVR-MDX-NET 1',
|
||||||
'mdxnetModeltype': 'Vocals (Custom)',
|
'mdxnetModeltype': 'Vocals (Custom)',
|
||||||
@@ -751,6 +797,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
global model_set_name
|
global model_set_name
|
||||||
global stemset_n
|
global stemset_n
|
||||||
global noise_pro_set
|
global noise_pro_set
|
||||||
|
global demucs_model_set
|
||||||
|
|
||||||
global mdx_model_hash
|
global mdx_model_hash
|
||||||
|
|
||||||
@@ -759,6 +806,9 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
global overlap_set
|
global overlap_set
|
||||||
global shift_set
|
global shift_set
|
||||||
global source_val
|
global source_val
|
||||||
|
global split_mode
|
||||||
|
|
||||||
|
global demucs_switch
|
||||||
|
|
||||||
# Update default settings
|
# Update default settings
|
||||||
default_chunks = data['chunks']
|
default_chunks = data['chunks']
|
||||||
@@ -823,161 +873,90 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
source_val_set = 0
|
source_val_set = 0
|
||||||
stem_name = '(Bass)'
|
stem_name = '(Bass)'
|
||||||
|
|
||||||
|
if data['mdxnetModel'] == 'UVR-MDX-NET 1':
|
||||||
try:
|
if os.path.isfile('models/MDX_Net_Models/UVR_MDXNET_1_9703.onnx'):
|
||||||
if data['mdxnetModel'] == 'UVR-MDX-NET 1':
|
|
||||||
model_set = 'UVR_MDXNET_1_9703'
|
model_set = 'UVR_MDXNET_1_9703'
|
||||||
model_set_name = 'UVR_MDXNET_1_9703'
|
model_set_name = 'UVR_MDXNET_1_9703'
|
||||||
modeltype = 'v'
|
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
|
||||||
stemset_n = '(Vocals)'
|
|
||||||
source_val = 3
|
|
||||||
n_fft_scale_set=6144
|
|
||||||
dim_f_set=2048
|
|
||||||
elif data['mdxnetModel'] == 'UVR-MDX-NET 2':
|
|
||||||
model_set = 'UVR_MDXNET_2_9682'
|
|
||||||
model_set_name = 'UVR_MDXNET_2_9682'
|
|
||||||
modeltype = 'v'
|
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
|
||||||
stemset_n = '(Vocals)'
|
|
||||||
source_val = 3
|
|
||||||
n_fft_scale_set=6144
|
|
||||||
dim_f_set=2048
|
|
||||||
elif data['mdxnetModel'] == 'UVR-MDX-NET 3':
|
|
||||||
model_set = 'UVR_MDXNET_3_9662'
|
|
||||||
model_set_name = 'UVR_MDXNET_3_9662'
|
|
||||||
modeltype = 'v'
|
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
|
||||||
stemset_n = '(Vocals)'
|
|
||||||
source_val = 3
|
|
||||||
n_fft_scale_set=6144
|
|
||||||
dim_f_set=2048
|
|
||||||
elif data['mdxnetModel'] == 'UVR-MDX-NET Karaoke':
|
|
||||||
model_set = 'UVR_MDXNET_KARA'
|
|
||||||
model_set_name = 'UVR_MDXNET_Karaoke'
|
|
||||||
modeltype = 'v'
|
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
|
||||||
stemset_n = '(Vocals)'
|
|
||||||
source_val = 3
|
|
||||||
n_fft_scale_set=6144
|
|
||||||
dim_f_set=2048
|
|
||||||
elif data['mdxnetModel'] == 'other':
|
|
||||||
model_set = 'other'
|
|
||||||
model_set_name = 'other'
|
|
||||||
modeltype = 'o'
|
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
|
|
||||||
stemset_n = '(Other)'
|
|
||||||
source_val = 2
|
|
||||||
n_fft_scale_set=8192
|
|
||||||
dim_f_set=2048
|
|
||||||
elif data['mdxnetModel'] == 'drums':
|
|
||||||
model_set = 'drums'
|
|
||||||
model_set_name = 'drums'
|
|
||||||
modeltype = 'd'
|
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
|
|
||||||
stemset_n = '(Drums)'
|
|
||||||
source_val = 1
|
|
||||||
n_fft_scale_set=4096
|
|
||||||
dim_f_set=2048
|
|
||||||
elif data['mdxnetModel'] == 'bass':
|
|
||||||
model_set = 'bass'
|
|
||||||
model_set_name = 'bass'
|
|
||||||
modeltype = 'b'
|
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
|
|
||||||
stemset_n = '(Bass)'
|
|
||||||
source_val = 0
|
|
||||||
n_fft_scale_set=16384
|
|
||||||
dim_f_set=2048
|
|
||||||
else:
|
else:
|
||||||
model_set = data['mdxnetModel']
|
|
||||||
model_set_name = data['mdxnetModel']
|
|
||||||
modeltype = stemset
|
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
|
|
||||||
stemset_n = stem_name
|
|
||||||
source_val = source_val_set
|
|
||||||
n_fft_scale_set=int(data['n_fft_scale'])
|
|
||||||
dim_f_set=int(data['dim_f'])
|
|
||||||
|
|
||||||
MDXModelName=('models/MDX_Net_Models/' + model_set + '.onnx')
|
|
||||||
mdx_model_hash = hashlib.md5(open(MDXModelName, 'rb').read()).hexdigest()
|
|
||||||
print(mdx_model_hash)
|
|
||||||
except:
|
|
||||||
if data['mdxnetModel'] == 'UVR-MDX-NET 1':
|
|
||||||
model_set = 'UVR_MDXNET_9703'
|
model_set = 'UVR_MDXNET_9703'
|
||||||
model_set_name = 'UVR_MDXNET_9703'
|
model_set_name = 'UVR_MDXNET_9703'
|
||||||
modeltype = 'v'
|
modeltype = 'v'
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
stemset_n = '(Vocals)'
|
stemset_n = '(Vocals)'
|
||||||
source_val = 3
|
source_val = 3
|
||||||
n_fft_scale_set=6144
|
n_fft_scale_set=6144
|
||||||
dim_f_set=2048
|
dim_f_set=2048
|
||||||
elif data['mdxnetModel'] == 'UVR-MDX-NET 2':
|
elif data['mdxnetModel'] == 'UVR-MDX-NET 2':
|
||||||
|
if os.path.isfile('models/MDX_Net_Models/UVR_MDXNET_2_9682.onnx'):
|
||||||
|
model_set = 'UVR_MDXNET_2_9682'
|
||||||
|
model_set_name = 'UVR_MDXNET_2_9682'
|
||||||
|
else:
|
||||||
model_set = 'UVR_MDXNET_9682'
|
model_set = 'UVR_MDXNET_9682'
|
||||||
model_set_name = 'UVR_MDXNET_9682'
|
model_set_name = 'UVR_MDXNET_9682'
|
||||||
modeltype = 'v'
|
modeltype = 'v'
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
stemset_n = '(Vocals)'
|
stemset_n = '(Vocals)'
|
||||||
source_val = 3
|
source_val = 3
|
||||||
n_fft_scale_set=6144
|
n_fft_scale_set=6144
|
||||||
dim_f_set=2048
|
dim_f_set=2048
|
||||||
elif data['mdxnetModel'] == 'UVR-MDX-NET 3':
|
elif data['mdxnetModel'] == 'UVR-MDX-NET 3':
|
||||||
|
if os.path.isfile('models/MDX_Net_Models/UVR_MDXNET_3_9662.onnx'):
|
||||||
|
model_set = 'UVR_MDXNET_3_9662'
|
||||||
|
model_set_name = 'UVR_MDXNET_3_9662'
|
||||||
|
else:
|
||||||
model_set = 'UVR_MDXNET_9662'
|
model_set = 'UVR_MDXNET_9662'
|
||||||
model_set_name = 'UVR_MDXNET_9662'
|
model_set_name = 'UVR_MDXNET_9662'
|
||||||
modeltype = 'v'
|
modeltype = 'v'
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
stemset_n = '(Vocals)'
|
stemset_n = '(Vocals)'
|
||||||
source_val = 3
|
source_val = 3
|
||||||
n_fft_scale_set=6144
|
n_fft_scale_set=6144
|
||||||
dim_f_set=2048
|
dim_f_set=2048
|
||||||
elif data['mdxnetModel'] == 'UVR-MDX-NET Karaoke':
|
elif data['mdxnetModel'] == 'UVR-MDX-NET Karaoke':
|
||||||
model_set = 'UVR_MDXNET_KARA'
|
model_set = 'UVR_MDXNET_KARA'
|
||||||
model_set_name = 'UVR_MDXNET_Karaoke'
|
model_set_name = 'UVR_MDXNET_Karaoke'
|
||||||
modeltype = 'v'
|
modeltype = 'v'
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
stemset_n = '(Vocals)'
|
stemset_n = '(Vocals)'
|
||||||
source_val = 3
|
source_val = 3
|
||||||
n_fft_scale_set=6144
|
n_fft_scale_set=6144
|
||||||
dim_f_set=2048
|
dim_f_set=2048
|
||||||
elif data['mdxnetModel'] == 'other':
|
elif 'other' in data['mdxnetModel']:
|
||||||
model_set = 'other'
|
model_set = 'other'
|
||||||
model_set_name = 'other'
|
model_set_name = 'other'
|
||||||
modeltype = 'o'
|
modeltype = 'o'
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
|
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
|
||||||
stemset_n = '(Other)'
|
stemset_n = '(Other)'
|
||||||
source_val = 2
|
source_val = 2
|
||||||
n_fft_scale_set=8192
|
n_fft_scale_set=8192
|
||||||
dim_f_set=2048
|
dim_f_set=2048
|
||||||
elif data['mdxnetModel'] == 'drums':
|
elif 'drums' in data['mdxnetModel']:
|
||||||
model_set = 'drums'
|
model_set = 'drums'
|
||||||
model_set_name = 'drums'
|
model_set_name = 'drums'
|
||||||
modeltype = 'd'
|
modeltype = 'd'
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
|
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
|
||||||
stemset_n = '(Drums)'
|
stemset_n = '(Drums)'
|
||||||
source_val = 1
|
source_val = 1
|
||||||
n_fft_scale_set=4096
|
n_fft_scale_set=4096
|
||||||
dim_f_set=2048
|
dim_f_set=2048
|
||||||
elif data['mdxnetModel'] == 'bass':
|
elif 'bass' in data['mdxnetModel']:
|
||||||
model_set = 'bass'
|
model_set = 'bass'
|
||||||
model_set_name = 'bass'
|
model_set_name = 'bass'
|
||||||
modeltype = 'b'
|
modeltype = 'b'
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
|
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
|
||||||
stemset_n = '(Bass)'
|
stemset_n = '(Bass)'
|
||||||
source_val = 0
|
source_val = 0
|
||||||
n_fft_scale_set=16384
|
n_fft_scale_set=16384
|
||||||
dim_f_set=2048
|
dim_f_set=2048
|
||||||
else:
|
else:
|
||||||
model_set = data['mdxnetModel']
|
model_set = data['mdxnetModel']
|
||||||
model_set_name = data['mdxnetModel']
|
model_set_name = data['mdxnetModel']
|
||||||
modeltype = stemset
|
modeltype = stemset
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
|
noise_pro = 'MDX-NET_Noise_Profile_Full_Band'
|
||||||
stemset_n = stem_name
|
stemset_n = stem_name
|
||||||
source_val = source_val_set
|
source_val = source_val_set
|
||||||
n_fft_scale_set=int(data['n_fft_scale'])
|
n_fft_scale_set=int(data['n_fft_scale'])
|
||||||
dim_f_set=int(data['dim_f'])
|
dim_f_set=int(data['dim_f'])
|
||||||
|
|
||||||
MDXModelName=('models/MDX_Net_Models/' + model_set_name + '.onnx')
|
|
||||||
mdx_model_hash = hashlib.md5(open(MDXModelName, 'rb').read()).hexdigest()
|
|
||||||
print(mdx_model_hash)
|
|
||||||
|
|
||||||
|
|
||||||
if data['noise_pro_select'] == 'Auto Select':
|
if data['noise_pro_select'] == 'Auto Select':
|
||||||
@@ -988,12 +967,8 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
|
|
||||||
print(n_fft_scale_set)
|
print(n_fft_scale_set)
|
||||||
print(dim_f_set)
|
print(dim_f_set)
|
||||||
print(data['DemucsModel'])
|
print(data['DemucsModel_MDX'])
|
||||||
|
|
||||||
overlap_set = float(data['overlap'])
|
|
||||||
channel_set = int(data['channel'])
|
|
||||||
margin_set = int(data['margin'])
|
|
||||||
shift_set = int(data['shifts'])
|
|
||||||
|
|
||||||
stime = time.perf_counter()
|
stime = time.perf_counter()
|
||||||
progress_var.set(0)
|
progress_var.set(0)
|
||||||
@@ -1003,6 +978,45 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
try: #Load File(s)
|
try: #Load File(s)
|
||||||
for file_num, music_file in tqdm(enumerate(data['input_paths'], start=1)):
|
for file_num, music_file in tqdm(enumerate(data['input_paths'], start=1)):
|
||||||
|
|
||||||
|
overlap_set = float(data['overlap'])
|
||||||
|
channel_set = int(data['channel'])
|
||||||
|
margin_set = int(data['margin'])
|
||||||
|
shift_set = int(data['shifts'])
|
||||||
|
demucs_model_set = data['DemucsModel_MDX']
|
||||||
|
split_mode = data['split_mode']
|
||||||
|
demucs_switch = data['demucsmodel']
|
||||||
|
|
||||||
|
if stemset_n == '(Bass)':
|
||||||
|
if 'UVR' in demucs_model_set:
|
||||||
|
text_widget.write('The selected Demucs model can only be used with vocal stems.\n')
|
||||||
|
text_widget.write('Please select a 4 stem Demucs model and try again.\n\n')
|
||||||
|
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}')
|
||||||
|
progress_var.set(0)
|
||||||
|
button_widget.configure(state=tk.NORMAL) # Enable Button
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
if stemset_n == '(Drums)':
|
||||||
|
if 'UVR' in demucs_model_set:
|
||||||
|
text_widget.write('The selected Demucs model can only be used with vocal stems.\n')
|
||||||
|
text_widget.write('Please select a 4 stem Demucs model and try again.\n\n')
|
||||||
|
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}')
|
||||||
|
progress_var.set(0)
|
||||||
|
button_widget.configure(state=tk.NORMAL) # Enable Button
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
if stemset_n == '(Other)':
|
||||||
|
if 'UVR' in demucs_model_set:
|
||||||
|
text_widget.write('The selected Demucs model can only be used with vocal stems.\n')
|
||||||
|
text_widget.write('Please select a 4 stem Demucs model and try again.\n\n')
|
||||||
|
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}')
|
||||||
|
progress_var.set(0)
|
||||||
|
button_widget.configure(state=tk.NORMAL) # Enable Button
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
_mixture = f'{data["input_paths"]}'
|
_mixture = f'{data["input_paths"]}'
|
||||||
_basename = f'{data["export_path"]}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}'
|
_basename = f'{data["export_path"]}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}'
|
||||||
|
|
||||||
@@ -1063,11 +1077,10 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
|
|
||||||
e = os.path.join(data["export_path"])
|
e = os.path.join(data["export_path"])
|
||||||
|
|
||||||
demucsmodel = 'models/Demucs_Model/' + str(data['DemucsModel'])
|
demucsmodel = 'models/Demucs_Models/' + str(data['DemucsModel_MDX'])
|
||||||
|
|
||||||
pred = Predictor()
|
pred = Predictor()
|
||||||
pred.prediction_setup(demucs_name=demucsmodel,
|
pred.prediction_setup()
|
||||||
channels=channel_set)
|
|
||||||
|
|
||||||
print(demucsmodel)
|
print(demucsmodel)
|
||||||
|
|
||||||
@@ -1373,7 +1386,10 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
text_widget.write("\n" + f'Please address the error and try again.' + "\n")
|
text_widget.write("\n" + f'Please address the error and try again.' + "\n")
|
||||||
text_widget.write(f'If this error persists, please contact the developers with the error details.\n\n')
|
text_widget.write(f'If this error persists, please contact the developers with the error details.\n\n')
|
||||||
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}')
|
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}')
|
||||||
torch.cuda.empty_cache()
|
try:
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
button_widget.configure(state=tk.NORMAL) # Enable Button
|
button_widget.configure(state=tk.NORMAL) # Enable Button
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|||||||
1225
inference_demucs.py
Normal file
1225
inference_demucs.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,5 @@
|
|||||||
from functools import total_ordering
|
|
||||||
import os
|
import os
|
||||||
import importlib
|
import importlib
|
||||||
from statistics import mode
|
|
||||||
import pydub
|
import pydub
|
||||||
import shutil
|
import shutil
|
||||||
import hashlib
|
import hashlib
|
||||||
@@ -1006,7 +1004,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
with open('errorlog.txt', 'w') as f:
|
with open('errorlog.txt', 'w') as f:
|
||||||
f.write(f'Last Error Received:\n\n' +
|
f.write(f'Last Error Received:\n\n' +
|
||||||
f'Error Received while processing "{os.path.basename(music_file)}":\n' +
|
f'Error Received while processing "{os.path.basename(music_file)}":\n' +
|
||||||
f'Process Method: Ensemble Mode\n\n' +
|
f'Process Method: VR Architecture\n\n' +
|
||||||
f'Could not write audio file.\n' +
|
f'Could not write audio file.\n' +
|
||||||
f'This could be due to low storage on target device or a system permissions issue.\n' +
|
f'This could be due to low storage on target device or a system permissions issue.\n' +
|
||||||
f'If the error persists, please contact the developers.\n\n' +
|
f'If the error persists, please contact the developers.\n\n' +
|
||||||
@@ -1031,7 +1029,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
with open('errorlog.txt', 'w') as f:
|
with open('errorlog.txt', 'w') as f:
|
||||||
f.write(f'Last Error Received:\n\n' +
|
f.write(f'Last Error Received:\n\n' +
|
||||||
f'Error Received while processing "{os.path.basename(music_file)}":\n' +
|
f'Error Received while processing "{os.path.basename(music_file)}":\n' +
|
||||||
f'Process Method: Ensemble Mode\n\n' +
|
f'Process Method: VR Architecture\n\n' +
|
||||||
f'The application was unable to allocate enough system memory to use this model.\n' +
|
f'The application was unable to allocate enough system memory to use this model.\n' +
|
||||||
f'Please do the following:\n\n1. Restart this application.\n2. Ensure any CPU intensive applications are closed.\n3. Then try again.\n\n' +
|
f'Please do the following:\n\n1. Restart this application.\n2. Ensure any CPU intensive applications are closed.\n3. Then try again.\n\n' +
|
||||||
f'Please Note: Intel Pentium and Intel Celeron processors do not work well with this application.\n\n' +
|
f'Please Note: Intel Pentium and Intel Celeron processors do not work well with this application.\n\n' +
|
||||||
|
|||||||
@@ -11,8 +11,10 @@ import subprocess
|
|||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
import torch
|
import torch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from demucs.model import Demucs
|
from demucs.pretrained import get_model as _gm
|
||||||
from demucs.utils import apply_model
|
from demucs.hdemucs import HDemucs
|
||||||
|
from demucs.apply import BagOfModels, apply_model
|
||||||
|
import pathlib
|
||||||
from models import get_models, spec_effects
|
from models import get_models, spec_effects
|
||||||
import onnxruntime as ort
|
import onnxruntime as ort
|
||||||
import time
|
import time
|
||||||
@@ -47,32 +49,36 @@ class Predictor():
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def prediction_setup(self, demucs_name,
|
def prediction_setup(self):
|
||||||
channels=64):
|
|
||||||
|
|
||||||
global device
|
global device
|
||||||
|
|
||||||
print('Print the gpu setting: ', data['gpu'])
|
|
||||||
|
|
||||||
if data['gpu'] >= 0:
|
if data['gpu'] >= 0:
|
||||||
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
||||||
if data['gpu'] == -1:
|
if data['gpu'] == -1:
|
||||||
device = torch.device('cpu')
|
device = torch.device('cpu')
|
||||||
|
|
||||||
if data['demucsmodel']:
|
if demucs_switch == 'on':
|
||||||
self.demucs = Demucs(sources=["drums", "bass", "other", "vocals"], channels=channels)
|
if 'UVR' in demucs_model_set:
|
||||||
|
self.demucs = HDemucs(sources=["other", "vocals"])
|
||||||
|
else:
|
||||||
|
self.demucs = HDemucs(sources=["drums", "bass", "other", "vocals"])
|
||||||
widget_text.write(base_text + 'Loading Demucs model... ')
|
widget_text.write(base_text + 'Loading Demucs model... ')
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=0.05)
|
step=0.05)
|
||||||
|
path_d = Path('models/Demucs_Models')
|
||||||
|
self.demucs = _gm(name=demucs_model_set, repo=path_d)
|
||||||
self.demucs.to(device)
|
self.demucs.to(device)
|
||||||
self.demucs.load_state_dict(torch.load(demucs_name))
|
|
||||||
widget_text.write('Done!\n')
|
|
||||||
self.demucs.eval()
|
self.demucs.eval()
|
||||||
|
widget_text.write('Done!\n')
|
||||||
|
if isinstance(self.demucs, BagOfModels):
|
||||||
|
widget_text.write(base_text + f"Selected Demucs model is a bag of {len(self.demucs.models)} model(s).\n")
|
||||||
|
|
||||||
self.onnx_models = {}
|
self.onnx_models = {}
|
||||||
c = 0
|
c = 0
|
||||||
|
|
||||||
self.models = get_models('tdf_extra', load=False, device=cpu, stems=modeltype, n_fft_scale=n_fft_scale_set, dim_f=dim_f_set)
|
self.models = get_models('tdf_extra', load=False, device=cpu, stems=modeltype, n_fft_scale=n_fft_scale_set, dim_f=dim_f_set)
|
||||||
if not data['demucs_only']:
|
if demucs_only == 'off':
|
||||||
widget_text.write(base_text + 'Loading ONNX model... ')
|
widget_text.write(base_text + 'Loading ONNX model... ')
|
||||||
|
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
@@ -89,23 +95,25 @@ class Predictor():
|
|||||||
elif data['gpu'] == -1:
|
elif data['gpu'] == -1:
|
||||||
run_type = ['CPUExecutionProvider']
|
run_type = ['CPUExecutionProvider']
|
||||||
|
|
||||||
print(run_type)
|
if demucs_only == 'off':
|
||||||
print(str(device))
|
self.onnx_models[c] = ort.InferenceSession(os.path.join('models/MDX_Net_Models', model_set), providers=run_type)
|
||||||
|
print(demucs_model_set)
|
||||||
print('model_set: ', model_set)
|
|
||||||
self.onnx_models[c] = ort.InferenceSession(os.path.join('models/MDX_Net_Models', model_set), providers=run_type)
|
|
||||||
|
|
||||||
if not data['demucs_only']:
|
|
||||||
widget_text.write('Done!\n')
|
widget_text.write('Done!\n')
|
||||||
|
elif demucs_only == 'on':
|
||||||
|
print(demucs_model_set)
|
||||||
|
pass
|
||||||
|
|
||||||
def prediction(self, m):
|
def prediction(self, m):
|
||||||
#mix, rate = sf.read(m)
|
|
||||||
mix, rate = librosa.load(m, mono=False, sr=44100)
|
mix, samplerate = librosa.load(m, mono=False, sr=44100)
|
||||||
if mix.ndim == 1:
|
if mix.ndim == 1:
|
||||||
mix = np.asfortranarray([mix,mix])
|
mix = np.asfortranarray([mix,mix])
|
||||||
|
samplerate = samplerate
|
||||||
|
|
||||||
mix = mix.T
|
mix = mix.T
|
||||||
sources = self.demix(mix.T)
|
sources = self.demix(mix.T)
|
||||||
widget_text.write(base_text + 'Inferences complete!\n')
|
widget_text.write(base_text + 'Inferences complete!\n')
|
||||||
|
|
||||||
c = -1
|
c = -1
|
||||||
|
|
||||||
#Main Save Path
|
#Main Save Path
|
||||||
@@ -154,20 +162,22 @@ class Predictor():
|
|||||||
else:
|
else:
|
||||||
file_exists = 'not_there'
|
file_exists = 'not_there'
|
||||||
|
|
||||||
|
if demucs_only == 'on':
|
||||||
|
data['noisereduc_s'] == 'None'
|
||||||
|
|
||||||
if not data['noisereduc_s'] == 'None':
|
if not data['noisereduc_s'] == 'None':
|
||||||
c += 1
|
c += 1
|
||||||
if not data['demucsmodel']:
|
if demucs_switch == 'off':
|
||||||
if data['inst_only'] and not data['voc_only']:
|
if data['inst_only'] and not data['voc_only']:
|
||||||
widget_text.write(base_text + 'Preparing to save Instrumental...')
|
widget_text.write(base_text + 'Preparing to save Instrumental...')
|
||||||
else:
|
else:
|
||||||
widget_text.write(base_text + 'Saving vocals... ')
|
widget_text.write(base_text + 'Saving vocals... ')
|
||||||
sf.write(non_reduced_vocal_path, sources[c].T, rate)
|
sf.write(non_reduced_vocal_path, sources[c].T, samplerate)
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=(0.9))
|
step=(0.9))
|
||||||
widget_text.write('Done!\n')
|
widget_text.write('Done!\n')
|
||||||
widget_text.write(base_text + 'Performing Noise Reduction... ')
|
widget_text.write(base_text + 'Performing Noise Reduction... ')
|
||||||
reduction_sen = float(int(data['noisereduc_s'])/10)
|
reduction_sen = float(int(data['noisereduc_s'])/10)
|
||||||
print(noise_pro_set)
|
|
||||||
subprocess.call("lib_v5\\sox\\sox.exe" + ' "' +
|
subprocess.call("lib_v5\\sox\\sox.exe" + ' "' +
|
||||||
f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' +
|
f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' +
|
||||||
"noisered lib_v5\\sox\\" + noise_pro_set + ".prof " + f"{reduction_sen}",
|
"noisered lib_v5\\sox\\" + noise_pro_set + ".prof " + f"{reduction_sen}",
|
||||||
@@ -181,31 +191,49 @@ class Predictor():
|
|||||||
widget_text.write(base_text + 'Preparing Instrumental...')
|
widget_text.write(base_text + 'Preparing Instrumental...')
|
||||||
else:
|
else:
|
||||||
widget_text.write(base_text + 'Saving Vocals... ')
|
widget_text.write(base_text + 'Saving Vocals... ')
|
||||||
sf.write(non_reduced_vocal_path, sources[3].T, rate)
|
if demucs_only == 'on':
|
||||||
update_progress(**progress_kwargs,
|
if 'UVR' in model_set_name:
|
||||||
step=(0.9))
|
sf.write(vocal_path, sources[1].T, samplerate)
|
||||||
widget_text.write('Done!\n')
|
update_progress(**progress_kwargs,
|
||||||
widget_text.write(base_text + 'Performing Noise Reduction... ')
|
step=(0.95))
|
||||||
reduction_sen = float(data['noisereduc_s'])/10
|
widget_text.write('Done!\n')
|
||||||
subprocess.call("lib_v5\\sox\\sox.exe" + ' "' +
|
if 'extra' in model_set_name:
|
||||||
f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' +
|
sf.write(vocal_path, sources[3].T, samplerate)
|
||||||
"noisered lib_v5\\sox\\" + noise_pro_set + ".prof " + f"{reduction_sen}",
|
update_progress(**progress_kwargs,
|
||||||
shell=True, stdout=subprocess.PIPE,
|
step=(0.95))
|
||||||
stdin=subprocess.PIPE, stderr=subprocess.PIPE)
|
widget_text.write('Done!\n')
|
||||||
update_progress(**progress_kwargs,
|
else:
|
||||||
step=(0.95))
|
sf.write(non_reduced_vocal_path, sources[3].T, samplerate)
|
||||||
widget_text.write('Done!\n')
|
update_progress(**progress_kwargs,
|
||||||
|
step=(0.9))
|
||||||
|
widget_text.write('Done!\n')
|
||||||
|
widget_text.write(base_text + 'Performing Noise Reduction... ')
|
||||||
|
reduction_sen = float(data['noisereduc_s'])/10
|
||||||
|
subprocess.call("lib_v5\\sox\\sox.exe" + ' "' +
|
||||||
|
f"{str(non_reduced_vocal_path)}" + '" "' + f"{str(vocal_path)}" + '" ' +
|
||||||
|
"noisered lib_v5\\sox\\" + noise_pro_set + ".prof " + f"{reduction_sen}",
|
||||||
|
shell=True, stdout=subprocess.PIPE,
|
||||||
|
stdin=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
update_progress(**progress_kwargs,
|
||||||
|
step=(0.95))
|
||||||
|
widget_text.write('Done!\n')
|
||||||
else:
|
else:
|
||||||
c += 1
|
c += 1
|
||||||
if not data['demucsmodel']:
|
if demucs_switch == 'off':
|
||||||
widget_text.write(base_text + 'Saving Vocals..')
|
widget_text.write(base_text + 'Saving Vocals..')
|
||||||
sf.write(vocal_path, sources[c].T, rate)
|
sf.write(vocal_path, sources[c].T, samplerate)
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=(0.9))
|
step=(0.9))
|
||||||
widget_text.write('Done!\n')
|
widget_text.write('Done!\n')
|
||||||
else:
|
else:
|
||||||
widget_text.write(base_text + 'Saving Vocals... ')
|
widget_text.write(base_text + 'Saving Vocals... ')
|
||||||
sf.write(vocal_path, sources[3].T, rate)
|
if demucs_only == 'on':
|
||||||
|
if 'UVR' in model_set_name:
|
||||||
|
sf.write(vocal_path, sources[1].T, samplerate)
|
||||||
|
if 'extra' in model_set_name:
|
||||||
|
sf.write(vocal_path, sources[3].T, samplerate)
|
||||||
|
else:
|
||||||
|
sf.write(vocal_path, sources[3].T, samplerate)
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=(0.9))
|
step=(0.9))
|
||||||
widget_text.write('Done!\n')
|
widget_text.write('Done!\n')
|
||||||
@@ -356,22 +384,35 @@ class Predictor():
|
|||||||
if end == samples:
|
if end == samples:
|
||||||
break
|
break
|
||||||
|
|
||||||
if not data['demucsmodel']:
|
|
||||||
|
if demucs_switch == 'off':
|
||||||
sources = self.demix_base(segmented_mix, margin_size=margin)
|
sources = self.demix_base(segmented_mix, margin_size=margin)
|
||||||
elif data['demucs_only']:
|
elif demucs_only == 'on':
|
||||||
sources = self.demix_demucs(segmented_mix, margin_size=margin)
|
if split_mode == True:
|
||||||
|
sources = self.demix_demucs_split(mix)
|
||||||
|
if split_mode == False:
|
||||||
|
sources = self.demix_demucs(segmented_mix, margin_size=margin)
|
||||||
else: # both, apply spec effects
|
else: # both, apply spec effects
|
||||||
base_out = self.demix_base(segmented_mix, margin_size=margin)
|
base_out = self.demix_base(segmented_mix, margin_size=margin)
|
||||||
demucs_out = self.demix_demucs(segmented_mix, margin_size=margin)
|
if split_mode == True:
|
||||||
|
demucs_out = self.demix_demucs_split(mix)
|
||||||
|
if split_mode == False:
|
||||||
|
demucs_out = self.demix_demucs(segmented_mix, margin_size=margin)
|
||||||
nan_count = np.count_nonzero(np.isnan(demucs_out)) + np.count_nonzero(np.isnan(base_out))
|
nan_count = np.count_nonzero(np.isnan(demucs_out)) + np.count_nonzero(np.isnan(base_out))
|
||||||
if nan_count > 0:
|
if nan_count > 0:
|
||||||
print('Warning: there are {} nan values in the array(s).'.format(nan_count))
|
print('Warning: there are {} nan values in the array(s).'.format(nan_count))
|
||||||
demucs_out, base_out = np.nan_to_num(demucs_out), np.nan_to_num(base_out)
|
demucs_out, base_out = np.nan_to_num(demucs_out), np.nan_to_num(base_out)
|
||||||
sources = {}
|
sources = {}
|
||||||
|
|
||||||
sources[3] = (spec_effects(wave=[demucs_out[3],base_out[0]],
|
if 'UVR' in demucs_model_set:
|
||||||
algorithm=data['mixing'],
|
sources[3] = (spec_effects(wave=[demucs_out[1],base_out[0]],
|
||||||
value=b[3])*float(data['compensate'])) # compensation
|
algorithm=data['mixing'],
|
||||||
|
value=b[3])*float(data['compensate'])) # compensation
|
||||||
|
else:
|
||||||
|
sources[3] = (spec_effects(wave=[demucs_out[3],base_out[0]],
|
||||||
|
algorithm=data['mixing'],
|
||||||
|
value=b[3])*float(data['compensate'])) # compensation
|
||||||
|
|
||||||
return sources
|
return sources
|
||||||
|
|
||||||
def demix_base(self, mixes, margin_size):
|
def demix_base(self, mixes, margin_size):
|
||||||
@@ -384,7 +425,7 @@ class Predictor():
|
|||||||
print(' Running ONNX Inference...')
|
print(' Running ONNX Inference...')
|
||||||
for mix in mixes:
|
for mix in mixes:
|
||||||
gui_progress_bar_onnx += 1
|
gui_progress_bar_onnx += 1
|
||||||
if data['demucsmodel']:
|
if demucs_switch == 'on':
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=(0.1 + (0.5/onnxitera_calc * gui_progress_bar_onnx)))
|
step=(0.1 + (0.5/onnxitera_calc * gui_progress_bar_onnx)))
|
||||||
else:
|
else:
|
||||||
@@ -430,13 +471,18 @@ class Predictor():
|
|||||||
return _sources
|
return _sources
|
||||||
|
|
||||||
def demix_demucs(self, mix, margin_size):
|
def demix_demucs(self, mix, margin_size):
|
||||||
|
print('shift_set ', shift_set)
|
||||||
processed = {}
|
processed = {}
|
||||||
demucsitera = len(mix)
|
demucsitera = len(mix)
|
||||||
demucsitera_calc = demucsitera * 2
|
demucsitera_calc = demucsitera * 2
|
||||||
gui_progress_bar_demucs = 0
|
gui_progress_bar_demucs = 0
|
||||||
|
|
||||||
|
widget_text.write(base_text + "Split Mode is off. (Chunks enabled for Demucs Model)\n")
|
||||||
|
|
||||||
widget_text.write(base_text + "Running Demucs Inference...\n")
|
widget_text.write(base_text + "Running Demucs Inference...\n")
|
||||||
widget_text.write(base_text + "Processing "f"{len(mix)} slices... ")
|
widget_text.write(base_text + "Processing "f"{len(mix)} slices... ")
|
||||||
print(' Running Demucs Inference...')
|
print('Running Demucs Inference...')
|
||||||
|
|
||||||
for nmix in mix:
|
for nmix in mix:
|
||||||
gui_progress_bar_demucs += 1
|
gui_progress_bar_demucs += 1
|
||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
@@ -446,7 +492,7 @@ class Predictor():
|
|||||||
ref = cmix.mean(0)
|
ref = cmix.mean(0)
|
||||||
cmix = (cmix - ref.mean()) / ref.std()
|
cmix = (cmix - ref.mean()) / ref.std()
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
sources = apply_model(self.demucs, cmix.to(device), split=True, overlap=overlap_set, shifts=shift_set)
|
sources = apply_model(self.demucs, cmix[None], split=split_mode, device=device, overlap=overlap_set, shifts=shift_set, progress=False)[0]
|
||||||
sources = (sources * ref.std() + ref.mean()).cpu().numpy()
|
sources = (sources * ref.std() + ref.mean()).cpu().numpy()
|
||||||
sources[[0,1]] = sources[[1,0]]
|
sources[[0,1]] = sources[[1,0]]
|
||||||
|
|
||||||
@@ -461,6 +507,26 @@ class Predictor():
|
|||||||
widget_text.write('Done!\n')
|
widget_text.write('Done!\n')
|
||||||
return sources
|
return sources
|
||||||
|
|
||||||
|
def demix_demucs_split(self, mix):
|
||||||
|
|
||||||
|
print('shift_set ', shift_set)
|
||||||
|
widget_text.write(base_text + "Split Mode is on. (Chunks disabled for Demucs Model)\n")
|
||||||
|
widget_text.write(base_text + "Running Demucs Inference...\n")
|
||||||
|
widget_text.write(base_text + "Processing "f"{len(mix)} slices... ")
|
||||||
|
print(' Running Demucs Inference...')
|
||||||
|
|
||||||
|
mix = torch.tensor(mix, dtype=torch.float32)
|
||||||
|
ref = mix.mean(0)
|
||||||
|
mix = (mix - ref.mean()) / ref.std()
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
sources = apply_model(self.demucs, mix[None], split=split_mode, device=device, overlap=overlap_set, shifts=shift_set, progress=False)[0]
|
||||||
|
|
||||||
|
widget_text.write('Done!\n')
|
||||||
|
|
||||||
|
sources = (sources * ref.std() + ref.mean()).cpu().numpy()
|
||||||
|
sources[[0,1]] = sources[[1,0]]
|
||||||
|
return sources
|
||||||
|
|
||||||
def update_progress(progress_var, total_files, file_num, step: float = 1):
|
def update_progress(progress_var, total_files, file_num, step: float = 1):
|
||||||
"""Calculate the progress for the progress widget in the GUI"""
|
"""Calculate the progress for the progress widget in the GUI"""
|
||||||
@@ -567,7 +633,7 @@ data = {
|
|||||||
'chunks': 'auto',
|
'chunks': 'auto',
|
||||||
'non_red': False,
|
'non_red': False,
|
||||||
'noisereduc_s': 3,
|
'noisereduc_s': 3,
|
||||||
'ensChoose': 'Basic Ensemble',
|
'ensChoose': 'Basic VR Ensemble',
|
||||||
'algo': 'Instrumentals (Min Spec)',
|
'algo': 'Instrumentals (Min Spec)',
|
||||||
#Advanced Options
|
#Advanced Options
|
||||||
'appendensem': False,
|
'appendensem': False,
|
||||||
@@ -575,11 +641,11 @@ data = {
|
|||||||
'overlap': 0.5,
|
'overlap': 0.5,
|
||||||
'shifts': 0,
|
'shifts': 0,
|
||||||
'margin': 44100,
|
'margin': 44100,
|
||||||
'channel': 64,
|
'split_mode': False,
|
||||||
'compensate': 1.03597672895,
|
'compensate': 1.03597672895,
|
||||||
'demucs_only': False,
|
'demucs_only': False,
|
||||||
'mixing': 'Default',
|
'mixing': 'Default',
|
||||||
'DemucsModel': 'demucs_extra-3646af93_org.th',
|
'DemucsModel_MDX': 'UVR_Demucs_Model_1',
|
||||||
|
|
||||||
# Models
|
# Models
|
||||||
'instrumentalModel': None,
|
'instrumentalModel': None,
|
||||||
@@ -627,17 +693,21 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
global ModelName_2
|
global ModelName_2
|
||||||
global mdx_model_hash
|
global mdx_model_hash
|
||||||
|
|
||||||
|
global demucs_model_set
|
||||||
|
|
||||||
global channel_set
|
global channel_set
|
||||||
global margin_set
|
global margin_set
|
||||||
global overlap_set
|
global overlap_set
|
||||||
global shift_set
|
global shift_set
|
||||||
|
|
||||||
global noise_pro_set
|
global noise_pro_set
|
||||||
|
|
||||||
|
|
||||||
global n_fft_scale_set
|
global n_fft_scale_set
|
||||||
global dim_f_set
|
global dim_f_set
|
||||||
|
|
||||||
|
global split_mode
|
||||||
|
global demucs_switch
|
||||||
|
global demucs_only
|
||||||
|
|
||||||
# Update default settings
|
# Update default settings
|
||||||
default_chunks = data['chunks']
|
default_chunks = data['chunks']
|
||||||
default_noisereduc_s = data['noisereduc_s']
|
default_noisereduc_s = data['noisereduc_s']
|
||||||
@@ -666,12 +736,6 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
overlap_set = float(data['overlap'])
|
|
||||||
channel_set = int(data['channel'])
|
|
||||||
margin_set = int(data['margin'])
|
|
||||||
shift_set = int(data['shifts'])
|
|
||||||
|
|
||||||
n_fft_scale_set=6144
|
n_fft_scale_set=6144
|
||||||
dim_f_set=2048
|
dim_f_set=2048
|
||||||
|
|
||||||
@@ -770,7 +834,26 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
# Separation Preperation
|
# Separation Preperation
|
||||||
try: #Ensemble Dictionary
|
try: #Ensemble Dictionary
|
||||||
|
|
||||||
if not data['ensChoose'] == 'User Ensemble':
|
overlap_set = float(data['overlap'])
|
||||||
|
channel_set = int(data['channel'])
|
||||||
|
margin_set = int(data['margin'])
|
||||||
|
shift_set = int(data['shifts'])
|
||||||
|
demucs_model_set = data['DemucsModel_MDX']
|
||||||
|
split_mode = data['split_mode']
|
||||||
|
demucs_switch = data['demucsmodel']
|
||||||
|
|
||||||
|
if data['demucsmodel']:
|
||||||
|
demucs_switch = 'on'
|
||||||
|
else:
|
||||||
|
demucs_switch = 'off'
|
||||||
|
|
||||||
|
if data['demucs_only']:
|
||||||
|
demucs_only = 'on'
|
||||||
|
else:
|
||||||
|
demucs_only = 'off'
|
||||||
|
|
||||||
|
|
||||||
|
if not data['ensChoose'] == 'Manual Ensemble':
|
||||||
|
|
||||||
#1st Model
|
#1st Model
|
||||||
|
|
||||||
@@ -1220,38 +1303,33 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
vr_ensem_mdx_c_name = data['vr_ensem_mdx_c']
|
vr_ensem_mdx_c_name = data['vr_ensem_mdx_c']
|
||||||
vr_ensem_mdx_c = f'models/Main_Models/{vr_ensem_mdx_c_name}.pth'
|
vr_ensem_mdx_c = f'models/Main_Models/{vr_ensem_mdx_c_name}.pth'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#MDX-Net Model
|
#MDX-Net Model
|
||||||
try:
|
|
||||||
if data['mdx_ensem'] == 'UVR-MDX-NET 1':
|
if data['mdx_ensem'] == 'UVR-MDX-NET 1':
|
||||||
|
if os.path.isfile('models/MDX_Net_Models/UVR_MDXNET_1_9703.onnx'):
|
||||||
mdx_ensem = 'UVR_MDXNET_1_9703'
|
mdx_ensem = 'UVR_MDXNET_1_9703'
|
||||||
if data['mdx_ensem'] == 'UVR-MDX-NET 2':
|
else:
|
||||||
mdx_ensem = 'UVR_MDXNET_2_9682'
|
|
||||||
if data['mdx_ensem'] == 'UVR-MDX-NET 3':
|
|
||||||
mdx_ensem = 'UVR_MDXNET_3_9662'
|
|
||||||
if data['mdx_ensem'] == 'UVR-MDX-NET Karaoke':
|
|
||||||
mdx_ensem = 'UVR_MDXNET_KARA'
|
|
||||||
|
|
||||||
MDXModelName=('models/MDX_Net_Models/' + mdx_ensem + '.onnx')
|
|
||||||
mdx_model_hash = hashlib.md5(open(MDXModelName, 'rb').read()).hexdigest()
|
|
||||||
print(mdx_ensem)
|
|
||||||
except:
|
|
||||||
if data['mdx_ensem'] == 'UVR-MDX-NET 1':
|
|
||||||
mdx_ensem = 'UVR_MDXNET_9703'
|
mdx_ensem = 'UVR_MDXNET_9703'
|
||||||
if data['mdx_ensem'] == 'UVR-MDX-NET 2':
|
if data['mdx_ensem'] == 'UVR-MDX-NET 2':
|
||||||
|
if os.path.isfile('models/MDX_Net_Models/UVR_MDXNET_2_9682.onnx'):
|
||||||
|
mdx_ensem = 'UVR_MDXNET_2_9682'
|
||||||
|
else:
|
||||||
mdx_ensem = 'UVR_MDXNET_9682'
|
mdx_ensem = 'UVR_MDXNET_9682'
|
||||||
if data['mdx_ensem'] == 'UVR-MDX-NET 3':
|
if data['mdx_ensem'] == 'UVR-MDX-NET 3':
|
||||||
|
if os.path.isfile('models/MDX_Net_Models/UVR_MDXNET_3_9662.onnx'):
|
||||||
|
mdx_ensem = 'UVR_MDXNET_3_9662'
|
||||||
|
else:
|
||||||
mdx_ensem = 'UVR_MDXNET_9662'
|
mdx_ensem = 'UVR_MDXNET_9662'
|
||||||
if data['mdx_ensem'] == 'UVR-MDX-NET Karaoke':
|
if data['mdx_ensem'] == 'UVR-MDX-NET Karaoke':
|
||||||
mdx_ensem = 'UVR_MDXNET_KARA'
|
mdx_ensem = 'UVR_MDXNET_KARA'
|
||||||
|
if data['mdx_ensem'] == 'Demucs UVR Model 1':
|
||||||
MDXModelName=('models/MDX_Net_Models/' + mdx_ensem + '.onnx')
|
mdx_ensem = 'UVR_Demucs_Model_1'
|
||||||
mdx_model_hash = hashlib.md5(open(MDXModelName, 'rb').read()).hexdigest()
|
if data['mdx_ensem'] == 'Demucs UVR Model 2':
|
||||||
print(mdx_model_hash)
|
mdx_ensem = 'UVR_Demucs_Model_2'
|
||||||
print(mdx_ensem)
|
if data['mdx_ensem'] == 'Demucs mdx_extra':
|
||||||
|
mdx_ensem = 'mdx_extra'
|
||||||
|
if data['mdx_ensem'] == 'Demucs mdx_extra_q':
|
||||||
|
mdx_ensem = 'mdx_extra_q'
|
||||||
|
|
||||||
#MDX-Net Model 2
|
#MDX-Net Model 2
|
||||||
|
|
||||||
@@ -1263,6 +1341,14 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
mdx_ensem_b = 'UVR_MDXNET_3_9662'
|
mdx_ensem_b = 'UVR_MDXNET_3_9662'
|
||||||
if data['mdx_ensem_b'] == 'UVR-MDX-NET Karaoke':
|
if data['mdx_ensem_b'] == 'UVR-MDX-NET Karaoke':
|
||||||
mdx_ensem_b = 'UVR_MDXNET_KARA'
|
mdx_ensem_b = 'UVR_MDXNET_KARA'
|
||||||
|
if data['mdx_ensem_b'] == 'Demucs UVR Model 1':
|
||||||
|
mdx_ensem_b = 'UVR_Demucs_Model_1'
|
||||||
|
if data['mdx_ensem_b'] == 'Demucs UVR Model 2':
|
||||||
|
mdx_ensem_b = 'UVR_Demucs_Model_2'
|
||||||
|
if data['mdx_ensem_b'] == 'Demucs mdx_extra':
|
||||||
|
mdx_ensem_b = 'mdx_extra'
|
||||||
|
if data['mdx_ensem_b'] == 'Demucs mdx_extra_q':
|
||||||
|
mdx_ensem_b = 'mdx_extra_q'
|
||||||
if data['mdx_ensem_b'] == 'No Model':
|
if data['mdx_ensem_b'] == 'No Model':
|
||||||
mdx_ensem_b = 'pass'
|
mdx_ensem_b = 'pass'
|
||||||
|
|
||||||
@@ -1456,7 +1542,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
if data['ensChoose'] == 'Basic Ensemble':
|
if data['ensChoose'] == 'Basic VR Ensemble':
|
||||||
loops = Basic_Ensem
|
loops = Basic_Ensem
|
||||||
ensefolder = 'Basic_Ensemble_Outputs'
|
ensefolder = 'Basic_Ensemble_Outputs'
|
||||||
if data['vr_ensem_c'] == 'No Model' and data['vr_ensem_d'] == 'No Model' and data['vr_ensem_e'] == 'No Model':
|
if data['vr_ensem_c'] == 'No Model' and data['vr_ensem_d'] == 'No Model' and data['vr_ensem_e'] == 'No Model':
|
||||||
@@ -1487,7 +1573,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
loops = Vocal_Models
|
loops = Vocal_Models
|
||||||
ensefolder = 'Vocal_Models_Ensemble_Outputs'
|
ensefolder = 'Vocal_Models_Ensemble_Outputs'
|
||||||
ensemode = 'Vocal_Models'
|
ensemode = 'Vocal_Models'
|
||||||
if data['ensChoose'] == 'MDX-Net/VR Ensemble':
|
if data['ensChoose'] == 'Multi-AI Ensemble':
|
||||||
loops = mdx_vr
|
loops = mdx_vr
|
||||||
ensefolder = 'MDX_VR_Ensemble_Outputs'
|
ensefolder = 'MDX_VR_Ensemble_Outputs'
|
||||||
if data['vr_ensem'] == 'No Model' and data['vr_ensem_mdx_a'] == 'No Model' and data['vr_ensem_mdx_b'] == 'No Model' and data['vr_ensem_mdx_c'] == 'No Model':
|
if data['vr_ensem'] == 'No Model' and data['vr_ensem_mdx_a'] == 'No Model' and data['vr_ensem_mdx_b'] == 'No Model' and data['vr_ensem_mdx_c'] == 'No Model':
|
||||||
@@ -1511,7 +1597,6 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
|
|
||||||
#Prepare Audiofile(s)
|
#Prepare Audiofile(s)
|
||||||
for file_num, music_file in enumerate(data['input_paths'], start=1):
|
for file_num, music_file in enumerate(data['input_paths'], start=1):
|
||||||
print(data['input_paths'])
|
|
||||||
# -Get text and update progress-
|
# -Get text and update progress-
|
||||||
base_text = get_baseText(total_files=len(data['input_paths']),
|
base_text = get_baseText(total_files=len(data['input_paths']),
|
||||||
file_num=file_num)
|
file_num=file_num)
|
||||||
@@ -1609,9 +1694,9 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
presentmodel = Path(c['model_location'])
|
presentmodel = Path(c['model_location'])
|
||||||
|
|
||||||
if presentmodel.is_file():
|
if presentmodel.is_file():
|
||||||
print(f'The file {presentmodel} exist')
|
print(f'The file {presentmodel} exists')
|
||||||
else:
|
else:
|
||||||
if data['ensChoose'] == 'MDX-Net/VR Ensemble':
|
if data['ensChoose'] == 'Multi-AI Ensemble':
|
||||||
text_widget.write(base_text + 'Model "' + c['model_name'] + '.pth" is missing.\n')
|
text_widget.write(base_text + 'Model "' + c['model_name'] + '.pth" is missing.\n')
|
||||||
text_widget.write(base_text + 'Installation of v5 Model Expansion Pack required to use this model.\n')
|
text_widget.write(base_text + 'Installation of v5 Model Expansion Pack required to use this model.\n')
|
||||||
text_widget.write(base_text + f'If the error persists, please verify all models are present.\n\n')
|
text_widget.write(base_text + f'If the error persists, please verify all models are present.\n\n')
|
||||||
@@ -1963,7 +2048,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
text_widget.write(base_text + 'Completed Seperation!\n\n')
|
text_widget.write(base_text + 'Completed Seperation!\n\n')
|
||||||
|
|
||||||
|
|
||||||
if data['ensChoose'] == 'MDX-Net/VR Ensemble':
|
if data['ensChoose'] == 'Multi-AI Ensemble':
|
||||||
|
|
||||||
mdx_name = c['mdx_model_name']
|
mdx_name = c['mdx_model_name']
|
||||||
|
|
||||||
@@ -1973,46 +2058,77 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
text_widget.write('Ensemble Mode - Running Model - ' + mdx_name + '\n\n')
|
text_widget.write('Ensemble Mode - Running Model - ' + mdx_name + '\n\n')
|
||||||
|
|
||||||
if mdx_name == 'UVR_MDXNET_1_9703':
|
if mdx_name == 'UVR_MDXNET_1_9703':
|
||||||
|
demucs_only = 'off'
|
||||||
model_set = 'UVR_MDXNET_1_9703.onnx'
|
model_set = 'UVR_MDXNET_1_9703.onnx'
|
||||||
model_set_name = 'UVR_MDXNET_1_9703'
|
model_set_name = 'UVR_MDXNET_1_9703'
|
||||||
modeltype = 'v'
|
modeltype = 'v'
|
||||||
|
demucs_model_set = data['DemucsModel_MDX']
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
if mdx_name == 'UVR_MDXNET_2_9682':
|
if mdx_name == 'UVR_MDXNET_2_9682':
|
||||||
|
demucs_only = 'off'
|
||||||
model_set = 'UVR_MDXNET_2_9682.onnx'
|
model_set = 'UVR_MDXNET_2_9682.onnx'
|
||||||
model_set_name = 'UVR_MDXNET_2_9682'
|
model_set_name = 'UVR_MDXNET_2_9682'
|
||||||
modeltype = 'v'
|
modeltype = 'v'
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
if mdx_name == 'UVR_MDXNET_3_9662':
|
if mdx_name == 'UVR_MDXNET_3_9662':
|
||||||
|
demucs_only = 'off'
|
||||||
model_set = 'UVR_MDXNET_3_9662.onnx'
|
model_set = 'UVR_MDXNET_3_9662.onnx'
|
||||||
model_set_name = 'UVR_MDXNET_3_9662'
|
model_set_name = 'UVR_MDXNET_3_9662'
|
||||||
modeltype = 'v'
|
modeltype = 'v'
|
||||||
|
demucs_model_set = data['DemucsModel_MDX']
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
if mdx_name == 'UVR_MDXNET_KARA':
|
if mdx_name == 'UVR_MDXNET_KARA':
|
||||||
|
demucs_only = 'off'
|
||||||
model_set = 'UVR_MDXNET_KARA.onnx'
|
model_set = 'UVR_MDXNET_KARA.onnx'
|
||||||
model_set_name = 'UVR_MDXNET_KARA'
|
model_set_name = 'UVR_MDXNET_KARA'
|
||||||
modeltype = 'v'
|
modeltype = 'v'
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
if mdx_name == 'UVR_MDXNET_9703':
|
if mdx_name == 'UVR_MDXNET_9703':
|
||||||
|
demucs_only = 'off'
|
||||||
model_set = 'UVR_MDXNET_9703.onnx'
|
model_set = 'UVR_MDXNET_9703.onnx'
|
||||||
model_set_name = 'UVR_MDXNET_9703'
|
model_set_name = 'UVR_MDXNET_9703'
|
||||||
modeltype = 'v'
|
modeltype = 'v'
|
||||||
|
demucs_model_set = data['DemucsModel_MDX']
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
if mdx_name == 'UVR_MDXNET_9682':
|
if mdx_name == 'UVR_MDXNET_9682':
|
||||||
|
demucs_only = 'off'
|
||||||
model_set = 'UVR_MDXNET_9682.onnx'
|
model_set = 'UVR_MDXNET_9682.onnx'
|
||||||
model_set_name = 'UVR_MDXNET_9682'
|
model_set_name = 'UVR_MDXNET_9682'
|
||||||
modeltype = 'v'
|
modeltype = 'v'
|
||||||
|
demucs_model_set = data['DemucsModel_MDX']
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
if mdx_name == 'UVR_MDXNET_9662':
|
if mdx_name == 'UVR_MDXNET_9662':
|
||||||
|
demucs_only = 'off'
|
||||||
model_set = 'UVR_MDXNET_9662.onnx'
|
model_set = 'UVR_MDXNET_9662.onnx'
|
||||||
model_set_name = 'UVR_MDXNET_9662'
|
model_set_name = 'UVR_MDXNET_9662'
|
||||||
modeltype = 'v'
|
modeltype = 'v'
|
||||||
|
demucs_model_set = data['DemucsModel_MDX']
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
if mdx_name == 'UVR_MDXNET_KARA':
|
if mdx_name == 'UVR_MDXNET_KARA':
|
||||||
|
demucs_only = 'off'
|
||||||
model_set = 'UVR_MDXNET_KARA.onnx'
|
model_set = 'UVR_MDXNET_KARA.onnx'
|
||||||
model_set_name = 'UVR_MDXNET_KARA'
|
model_set_name = 'UVR_MDXNET_KARA'
|
||||||
modeltype = 'v'
|
modeltype = 'v'
|
||||||
|
demucs_model_set = data['DemucsModel_MDX']
|
||||||
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
|
if 'Demucs' in mdx_name:
|
||||||
|
demucs_only = 'on'
|
||||||
|
demucs_switch = 'on'
|
||||||
|
demucs_model_set = mdx_name
|
||||||
|
model_set = ''
|
||||||
|
model_set_name = 'UVR'
|
||||||
|
modeltype = 'v'
|
||||||
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
|
if 'extra' in mdx_name:
|
||||||
|
demucs_only = 'on'
|
||||||
|
demucs_switch = 'on'
|
||||||
|
demucs_model_set = mdx_name
|
||||||
|
model_set = ''
|
||||||
|
model_set_name = 'extra'
|
||||||
|
modeltype = 'v'
|
||||||
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
noise_pro = 'MDX-NET_Noise_Profile_14_kHz'
|
||||||
|
|
||||||
|
print('demucs_only? ', demucs_only)
|
||||||
|
|
||||||
if data['noise_pro_select'] == 'Auto Select':
|
if data['noise_pro_select'] == 'Auto Select':
|
||||||
noise_pro_set = noise_pro
|
noise_pro_set = noise_pro
|
||||||
@@ -2034,11 +2150,8 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
|
|
||||||
e = os.path.join(data["export_path"])
|
e = os.path.join(data["export_path"])
|
||||||
|
|
||||||
demucsmodel = 'models/Demucs_Model/' + str(data['DemucsModel'])
|
|
||||||
|
|
||||||
pred = Predictor()
|
pred = Predictor()
|
||||||
pred.prediction_setup(demucs_name=demucsmodel,
|
pred.prediction_setup()
|
||||||
channels=channel_set)
|
|
||||||
|
|
||||||
# split
|
# split
|
||||||
pred.prediction(
|
pred.prediction(
|
||||||
@@ -2502,7 +2615,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
try:
|
try:
|
||||||
with open('errorlog.txt', 'w') as f:
|
with open('errorlog.txt', 'w') as f:
|
||||||
f.write(f'Last Error Received:\n\n' +
|
f.write(f'Last Error Received:\n\n' +
|
||||||
f'Error Received while attempting to run user ensemble:\n' +
|
f'Error Received while attempting to run Manual Ensemble:\n' +
|
||||||
f'Process Method: Ensemble Mode\n\n' +
|
f'Process Method: Ensemble Mode\n\n' +
|
||||||
f'FFmpeg might be missing or corrupted.\n\n' +
|
f'FFmpeg might be missing or corrupted.\n\n' +
|
||||||
f'If this error persists, please contact the developers.\n\n' +
|
f'If this error persists, please contact the developers.\n\n' +
|
||||||
@@ -2530,7 +2643,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
try:
|
try:
|
||||||
with open('errorlog.txt', 'w') as f:
|
with open('errorlog.txt', 'w') as f:
|
||||||
f.write(f'Last Error Received:\n\n' +
|
f.write(f'Last Error Received:\n\n' +
|
||||||
f'Error Received while attempting to run user ensemble:\n' +
|
f'Error Received while attempting to run Manual Ensemble:\n' +
|
||||||
f'Process Method: Ensemble Mode\n\n' +
|
f'Process Method: Ensemble Mode\n\n' +
|
||||||
f'FFmpeg might be missing or corrupted.\n\n' +
|
f'FFmpeg might be missing or corrupted.\n\n' +
|
||||||
f'If this error persists, please contact the developers.\n\n' +
|
f'If this error persists, please contact the developers.\n\n' +
|
||||||
@@ -2899,11 +3012,10 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
|
|||||||
update_progress(**progress_kwargs,
|
update_progress(**progress_kwargs,
|
||||||
step=1)
|
step=1)
|
||||||
|
|
||||||
|
|
||||||
print('Done!')
|
print('Done!')
|
||||||
|
|
||||||
progress_var.set(0)
|
progress_var.set(0)
|
||||||
if not data['ensChoose'] == 'User Ensemble':
|
if not data['ensChoose'] == 'Manual Ensemble':
|
||||||
text_widget.write(base_text + f'Conversions Completed!\n')
|
text_widget.write(base_text + f'Conversions Completed!\n')
|
||||||
elif data['algo'] == 'Instrumentals (Min Spec)' and len(data['input_paths']) <= 1 or data['algo'] == 'Vocals (Max Spec)' and len(data['input_paths']) <= 1:
|
elif data['algo'] == 'Instrumentals (Min Spec)' and len(data['input_paths']) <= 1 or data['algo'] == 'Vocals (Max Spec)' and len(data['input_paths']) <= 1:
|
||||||
text_widget.write(base_text + f'Please select 2 or more files to use this feature and try again.\n')
|
text_widget.write(base_text + f'Please select 2 or more files to use this feature and try again.\n')
|
||||||
|
|||||||
Reference in New Issue
Block a user