Add files via upload

This commit is contained in:
Anjok07
2022-04-14 23:41:57 -05:00
committed by GitHub
parent 18fbabc673
commit 4d9122581e
3 changed files with 1017 additions and 315 deletions

View File

@@ -30,70 +30,8 @@ class VocalRemover(object):
self.text_widget = text_widget
self.models = defaultdict(lambda: None)
self.devices = defaultdict(lambda: None)
self._load_models()
# self.offset = model.offset
def _load_models(self):
self.text_widget.write('Loading models...\n') # nopep8 Write Command Text
nn_arch_sizes = [
31191, # default
33966, 123821, 123812, 537238 # custom
]
global args
global model_params_d
p = argparse.ArgumentParser()
p.add_argument('--paramone', type=str, default='lib_v5/modelparams/4band_44100.json')
p.add_argument('--paramtwo', type=str, default='lib_v5/modelparams/4band_v2.json')
p.add_argument('--paramthree', type=str, default='lib_v5/modelparams/3band_44100_msb2.json')
p.add_argument('--paramfour', type=str, default='lib_v5/modelparams/4band_v2_sn.json')
p.add_argument('--aggressiveness',type=float, default=data['agg']/100)
p.add_argument('--nn_architecture', type=str, choices= ['auto'] + list('{}KB'.format(s) for s in nn_arch_sizes), default='auto')
p.add_argument('--high_end_process', type=str, default='mirroring')
args = p.parse_args()
if 'auto' == args.nn_architecture:
model_size = math.ceil(os.stat(data['instrumentalModel']).st_size / 1024)
args.nn_architecture = '{}KB'.format(min(nn_arch_sizes, key=lambda x:abs(x-model_size)))
nets = importlib.import_module('lib_v5.nets' + f'_{args.nn_architecture}'.replace('_{}KB'.format(nn_arch_sizes[0]), ''), package=None)
ModelName=(data['instrumentalModel'])
ModelParam1="4BAND_44100"
ModelParam2="4BAND_44100_B"
ModelParam3="MSB2"
ModelParam4="4BAND_44100_SN"
if ModelParam1 in ModelName:
model_params_d=args.paramone
if ModelParam2 in ModelName:
model_params_d=args.paramtwo
if ModelParam3 in ModelName:
model_params_d=args.paramthree
if ModelParam4 in ModelName:
model_params_d=args.paramfour
print(model_params_d)
mp = ModelParameters(model_params_d)
# -Instrumental-
if os.path.isfile(data['instrumentalModel']):
device = torch.device('cpu')
model = nets.CascadedASPPNet(mp.param['bins'] * 2)
model.load_state_dict(torch.load(self.data['instrumentalModel'],
map_location=device))
if torch.cuda.is_available() and self.data['gpu'] >= 0:
device = torch.device('cuda:{}'.format(self.data['gpu']))
model.to(device)
self.models['instrumental'] = model
self.devices['instrumental'] = device
self.text_widget.write('Done!\n')
data = {
# Paths
@@ -152,6 +90,26 @@ def determineModelFolderName():
def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress_var: tk.Variable,
**kwargs: dict):
global args
global model_params_d
global nn_arch_sizes
nn_arch_sizes = [
31191, # default
33966, 123821, 123812, 537238 # custom
]
p = argparse.ArgumentParser()
p.add_argument('--paramone', type=str, default='lib_v5/modelparams/4band_44100.json')
p.add_argument('--paramtwo', type=str, default='lib_v5/modelparams/4band_v2.json')
p.add_argument('--paramthree', type=str, default='lib_v5/modelparams/3band_44100_msb2.json')
p.add_argument('--paramfour', type=str, default='lib_v5/modelparams/4band_v2_sn.json')
p.add_argument('--aggressiveness',type=float, default=data['agg']/100)
p.add_argument('--nn_architecture', type=str, choices= ['auto'] + list('{}KB'.format(s) for s in nn_arch_sizes), default='auto')
p.add_argument('--high_end_process', type=str, default='mirroring')
args = p.parse_args()
def save_files(wav_instrument, wav_vocals):
"""Save output music files"""
@@ -215,210 +173,255 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
os.mkdir(folder_path)
# Separation Preperation
try:
for file_num, music_file in enumerate(data['input_paths'], start=1):
# Determine File Name
base_name = f'{data["export_path"]}{modelFolderName}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}'
# Start Separation
model_name = os.path.basename(data[f'{data["useModel"]}Model'])
model = vocal_remover.models[data['useModel']]
device = vocal_remover.devices[data['useModel']]
# -Get text and update progress-
base_text = get_baseText(total_files=len(data['input_paths']),
file_num=file_num)
progress_kwargs = {'progress_var': progress_var,
'total_files': len(data['input_paths']),
'file_num': file_num}
update_progress(**progress_kwargs,
step=0)
mp = ModelParameters(model_params_d)
try: #Load File(s)
for file_num, music_file in enumerate(data['input_paths'], start=1):
# Determine File Name
base_name = f'{data["export_path"]}{modelFolderName}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}'
# -Go through the different steps of seperation-
# Wave source
text_widget.write(base_text + 'Loading wave source...\n')
X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
bands_n = len(mp.param['band'])
for d in range(bands_n, 0, -1):
bp = mp.param['band'][d]
if d == bands_n: # high-end band
X_wave[d], _ = librosa.load(
music_file, bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
if X_wave[d].ndim == 1:
X_wave[d] = np.asarray([X_wave[d], X_wave[d]])
else: # lower bands
X_wave[d] = librosa.resample(X_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type'])
model_name = os.path.basename(data[f'{data["useModel"]}Model'])
model = vocal_remover.models[data['useModel']]
device = vocal_remover.devices[data['useModel']]
# -Get text and update progress-
base_text = get_baseText(total_files=len(data['input_paths']),
file_num=file_num)
progress_kwargs = {'progress_var': progress_var,
'total_files': len(data['input_paths']),
'file_num': file_num}
update_progress(**progress_kwargs,
step=0)
#Load Model(s)
text_widget.write(base_text + 'Loading models...')
if 'auto' == args.nn_architecture:
model_size = math.ceil(os.stat(data['instrumentalModel']).st_size / 1024)
args.nn_architecture = '{}KB'.format(min(nn_arch_sizes, key=lambda x:abs(x-model_size)))
nets = importlib.import_module('lib_v5.nets' + f'_{args.nn_architecture}'.replace('_{}KB'.format(nn_arch_sizes[0]), ''), package=None)
ModelName=(data['instrumentalModel'])
ModelParam1="4BAND_44100"
ModelParam2="4BAND_44100_B"
ModelParam3="MSB2"
ModelParam4="4BAND_44100_SN"
if ModelParam1 in ModelName:
model_params_d=args.paramone
if ModelParam2 in ModelName:
model_params_d=args.paramtwo
if ModelParam3 in ModelName:
model_params_d=args.paramthree
if ModelParam4 in ModelName:
model_params_d=args.paramfour
# Stft of wave source
print('Model Parameters:', model_params_d)
X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(X_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'],
mp.param['mid_side_b2'], mp.param['reverse'])
mp = ModelParameters(model_params_d)
if d == bands_n and args.high_end_process != 'none':
input_high_end_h = (bp['n_fft']//2 - bp['crop_stop']) + (mp.param['pre_filter_stop'] - mp.param['pre_filter_start'])
input_high_end = X_spec_s[d][:, bp['n_fft']//2-input_high_end_h:bp['n_fft']//2, :]
text_widget.write(base_text + 'Done!\n')
update_progress(**progress_kwargs,
step=0.1)
text_widget.write(base_text + 'Stft of wave source...\n')
text_widget.write(base_text + 'Done!\n')
text_widget.write(base_text + "Please Wait..\n")
X_spec_m = spec_utils.combine_spectrograms(X_spec_s, mp)
del X_wave, X_spec_s
def inference(X_spec, device, model, aggressiveness):
def _execute(X_mag_pad, roi_size, n_window, device, model, aggressiveness):
model.eval()
# -Instrumental-
if os.path.isfile(data['instrumentalModel']):
device = torch.device('cpu')
model = nets.CascadedASPPNet(mp.param['bins'] * 2)
model.load_state_dict(torch.load(data['instrumentalModel'],
map_location=device))
if torch.cuda.is_available() and data['gpu'] >= 0:
device = torch.device('cuda:{}'.format(data['gpu']))
model.to(device)
with torch.no_grad():
preds = []
iterations = [n_window]
vocal_remover.models['instrumental'] = model
vocal_remover.devices['instrumental'] = device
total_iterations = sum(iterations)
text_widget.write(' Done!\n')
model_name = os.path.basename(data[f'{data["useModel"]}Model'])
mp = ModelParameters(model_params_d)
text_widget.write(base_text + "Length: "f"{total_iterations} Slices\n")
for i in tqdm(range(n_window)):
update_progress(**progress_kwargs,
step=(0.1 + (0.8/n_window * i)))
start = i * roi_size
X_mag_window = X_mag_pad[None, :, :, start:start + data['window_size']]
X_mag_window = torch.from_numpy(X_mag_window).to(device)
pred = model.predict(X_mag_window, aggressiveness)
pred = pred.detach().cpu().numpy()
preds.append(pred[0])
# -Go through the different steps of seperation-
# Wave source
text_widget.write(base_text + 'Loading wave source...')
X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
bands_n = len(mp.param['band'])
for d in range(bands_n, 0, -1):
bp = mp.param['band'][d]
if d == bands_n: # high-end band
X_wave[d], _ = librosa.load(
music_file, bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
pred = np.concatenate(preds, axis=2)
if X_wave[d].ndim == 1:
X_wave[d] = np.asarray([X_wave[d], X_wave[d]])
else: # lower bands
X_wave[d] = librosa.resample(X_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type'])
# Stft of wave source
return pred
X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(X_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'],
mp.param['mid_side_b2'], mp.param['reverse'])
if d == bands_n and args.high_end_process != 'none':
input_high_end_h = (bp['n_fft']//2 - bp['crop_stop']) + (mp.param['pre_filter_stop'] - mp.param['pre_filter_start'])
input_high_end = X_spec_s[d][:, bp['n_fft']//2-input_high_end_h:bp['n_fft']//2, :]
def preprocess(X_spec):
X_mag = np.abs(X_spec)
X_phase = np.angle(X_spec)
text_widget.write('Done!\n')
return X_mag, X_phase
update_progress(**progress_kwargs,
step=0.1)
text_widget.write(base_text + 'Stft of wave source...')
X_mag, X_phase = preprocess(X_spec)
coef = X_mag.max()
X_mag_pre = X_mag / coef
n_frame = X_mag_pre.shape[2]
pad_l, pad_r, roi_size = dataset.make_padding(n_frame,
data['window_size'], model.offset)
n_window = int(np.ceil(n_frame / roi_size))
X_mag_pad = np.pad(
X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
text_widget.write(' Done!\n')
pred = _execute(X_mag_pad, roi_size, n_window,
device, model, aggressiveness)
pred = pred[:, :, :n_frame]
text_widget.write(base_text + "Please Wait...\n")
X_spec_m = spec_utils.combine_spectrograms(X_spec_s, mp)
if data['tta']:
pad_l += roi_size // 2
pad_r += roi_size // 2
n_window += 1
del X_wave, X_spec_s
def inference(X_spec, device, model, aggressiveness):
def _execute(X_mag_pad, roi_size, n_window, device, model, aggressiveness):
model.eval()
with torch.no_grad():
preds = []
iterations = [n_window]
total_iterations = sum(iterations)
text_widget.write(base_text + "Processing "f"{total_iterations} Slices... ")
for i in tqdm(range(n_window)):
update_progress(**progress_kwargs,
step=(0.1 + (0.8/n_window * i)))
start = i * roi_size
X_mag_window = X_mag_pad[None, :, :, start:start + data['window_size']]
X_mag_window = torch.from_numpy(X_mag_window).to(device)
pred = model.predict(X_mag_window, aggressiveness)
pred = pred.detach().cpu().numpy()
preds.append(pred[0])
pred = np.concatenate(preds, axis=2)
text_widget.write('Done!\n')
return pred
def preprocess(X_spec):
X_mag = np.abs(X_spec)
X_phase = np.angle(X_spec)
return X_mag, X_phase
X_mag, X_phase = preprocess(X_spec)
coef = X_mag.max()
X_mag_pre = X_mag / coef
n_frame = X_mag_pre.shape[2]
pad_l, pad_r, roi_size = dataset.make_padding(n_frame,
data['window_size'], model.offset)
n_window = int(np.ceil(n_frame / roi_size))
X_mag_pad = np.pad(
X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
pred = _execute(X_mag_pad, roi_size, n_window,
device, model, aggressiveness)
pred = pred[:, :, :n_frame]
if data['tta']:
pad_l += roi_size // 2
pad_r += roi_size // 2
n_window += 1
pred_tta = _execute(X_mag_pad, roi_size, n_window,
device, model, aggressiveness)
pred_tta = pred_tta[:, :, roi_size // 2:]
pred_tta = pred_tta[:, :, :n_frame]
X_mag_pad = np.pad(
X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
return (pred + pred_tta) * 0.5 * coef, X_mag, np.exp(1.j * X_phase)
pred_tta = _execute(X_mag_pad, roi_size, n_window,
device, model, aggressiveness)
pred_tta = pred_tta[:, :, roi_size // 2:]
pred_tta = pred_tta[:, :, :n_frame]
return (pred + pred_tta) * 0.5 * coef, X_mag, np.exp(1.j * X_phase)
else:
return pred * coef, X_mag, np.exp(1.j * X_phase)
aggressiveness = {'value': args.aggressiveness, 'split_bin': mp.param['band'][1]['crop_stop']}
if data['tta']:
text_widget.write(base_text + "Running Inferences (TTA)...\n")
else:
return pred * coef, X_mag, np.exp(1.j * X_phase)
aggressiveness = {'value': args.aggressiveness, 'split_bin': mp.param['band'][1]['crop_stop']}
if data['tta']:
text_widget.write(base_text + "Running Inferences (TTA)...\n")
else:
text_widget.write(base_text + "Running Inference...\n")
pred, X_mag, X_phase = inference(X_spec_m,
device,
model, aggressiveness)
text_widget.write(base_text + 'Done!\n')
update_progress(**progress_kwargs,
step=0.9)
# Postprocess
if data['postprocess']:
text_widget.write(base_text + 'Post processing...\n')
pred_inv = np.clip(X_mag - pred, 0, np.inf)
pred = spec_utils.mask_silence(pred, pred_inv)
text_widget.write(base_text + 'Done!\n')
text_widget.write(base_text + "Running Inference...\n")
pred, X_mag, X_phase = inference(X_spec_m,
device,
model, aggressiveness)
update_progress(**progress_kwargs,
step=0.95)
step=0.9)
# Postprocess
if data['postprocess']:
text_widget.write(base_text + 'Post processing...')
pred_inv = np.clip(X_mag - pred, 0, np.inf)
pred = spec_utils.mask_silence(pred, pred_inv)
text_widget.write(' Done!\n')
# Inverse stft
text_widget.write(base_text + 'Inverse stft of instruments and vocals...\n') # nopep8
y_spec_m = pred * X_phase
v_spec_m = X_spec_m - y_spec_m
if args.high_end_process.startswith('mirroring'):
input_high_end_ = spec_utils.mirroring(args.high_end_process, y_spec_m, input_high_end, mp)
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp, input_high_end_h, input_high_end_)
else:
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp)
update_progress(**progress_kwargs,
step=0.95)
if args.high_end_process.startswith('mirroring'):
input_high_end_ = spec_utils.mirroring(args.high_end_process, v_spec_m, input_high_end, mp)
# Inverse stft
text_widget.write(base_text + 'Inverse stft of instruments and vocals...') # nopep8
y_spec_m = pred * X_phase
v_spec_m = X_spec_m - y_spec_m
if args.high_end_process.startswith('mirroring'):
input_high_end_ = spec_utils.mirroring(args.high_end_process, y_spec_m, input_high_end, mp)
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp, input_high_end_h, input_high_end_)
else:
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp)
text_widget.write(base_text + 'Done!\n')
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp, input_high_end_h, input_high_end_)
else:
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp)
update_progress(**progress_kwargs,
step=1)
# Save output music files
text_widget.write(base_text + 'Saving Files...\n')
save_files(wav_instrument, wav_vocals)
text_widget.write(base_text + 'Done!\n')
if args.high_end_process.startswith('mirroring'):
input_high_end_ = spec_utils.mirroring(args.high_end_process, v_spec_m, input_high_end, mp)
update_progress(**progress_kwargs,
step=1)
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp, input_high_end_h, input_high_end_)
else:
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp)
text_widget.write('Done!\n')
# Save output image
if data['output_image']:
with open('{}_Instruments.jpg'.format(base_name), mode='wb') as f:
image = spec_utils.spectrogram_to_image(y_spec_m)
_, bin_image = cv2.imencode('.jpg', image)
bin_image.tofile(f)
with open('{}_Vocals.jpg'.format(base_name), mode='wb') as f:
image = spec_utils.spectrogram_to_image(v_spec_m)
_, bin_image = cv2.imencode('.jpg', image)
bin_image.tofile(f)
update_progress(**progress_kwargs,
step=1)
# Save output music files
text_widget.write(base_text + 'Saving Files...')
save_files(wav_instrument, wav_vocals)
text_widget.write(' Done!\n')
text_widget.write(base_text + 'Completed Seperation!\n\n')
update_progress(**progress_kwargs,
step=1)
# Save output image
if data['output_image']:
with open('{}_Instruments.jpg'.format(base_name), mode='wb') as f:
image = spec_utils.spectrogram_to_image(y_spec_m)
_, bin_image = cv2.imencode('.jpg', image)
bin_image.tofile(f)
with open('{}_Vocals.jpg'.format(base_name), mode='wb') as f:
image = spec_utils.spectrogram_to_image(v_spec_m)
_, bin_image = cv2.imencode('.jpg', image)
bin_image.tofile(f)
text_widget.write(base_text + 'Completed Seperation!\n\n')
except Exception as e:
traceback_text = ''.join(traceback.format_tb(e.__traceback__))
message = f'Traceback Error: "{traceback_text}"\n{type(e).__name__}: "{e}"\nFile: {music_file}\nPlease contact the creator and attach a screenshot of this error with the file and settings that caused it!'
@@ -433,9 +436,9 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress
return
os.remove('temp.wav')
progress_var.set(0)
text_widget.write(f'Conversion(s) Completed and Saving all Files!\n')
text_widget.write(f'\nConversion(s) Completed!\n')
text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') # nopep8
torch.cuda.empty_cache()
button_widget.configure(state=tk.NORMAL) # Enable Button