Add files via upload

2023-04-12 03:26:00 -05:00
parent 6ffd7a244e
commit 459b38b3b9
1 changed files with 77 additions and 103 deletions
--- a/lib_v5/spec_utils.py
+++ b/lib_v5/spec_utils.py
@@ -5,8 +5,9 @@ import math
 import random
 import math
 import platform
 import traceback
 from . import pyrb
-
+#cur
 OPERATING_SYSTEM = platform.system()
 SYSTEM_ARCH = platform.platform()
 SYSTEM_PROC = platform.processor()
@@ -18,7 +19,7 @@ else:
    from . import pyrb
 if OPERATING_SYSTEM == 'Darwin':
-    wav_resolution = "polyphase" if SYSTEM_PROC == ARM or ARM in SYSTEM_ARCH else 'sinc_fastest'
+    wav_resolution = "polyphase" if SYSTEM_PROC == ARM or ARM in SYSTEM_ARCH else "sinc_fastest" 
 else:
    wav_resolution = "sinc_fastest"
@@ -35,8 +36,6 @@ def crop_center(h1, h2):
    elif h1_shape[3] < h2_shape[3]:
        raise ValueError('h1_shape[3] must be greater than h2_shape[3]')
    # s_freq = (h2_shape[2] - h1_shape[2]) // 2
    # e_freq = s_freq + h1_shape[2]
    s_time = (h1_shape[3] - h2_shape[3]) // 2
    e_time = s_time + h2_shape[3]
    h1 = h1[:, :, :, s_time:e_time]
@@ -116,6 +115,8 @@ def normalize(wave, is_normalize=False):
        if is_normalize:
            print(f"The result was normalized.")
            wave /= maxv
        else:
            print(f"The result was not normalized.")
    else:
        print(f"\nNormalization Set {is_normalize}: Input not above threshold for clipping. Max:{maxv}")
@@ -128,11 +129,14 @@ def normalize_two_stem(wave, mix, is_normalize=False):
    max_mix = np.abs(mix).max()
    if maxv > 1.0:
-        print(f"\nNormalization Set {is_normalize}: Primary source above threshold for clipping. The result was normalized. Max:{maxv}")
+        print(f"\nNormalization Set {is_normalize}: Primary source above threshold for clipping. Max:{maxv}")
-        print(f"\nNormalization Set {is_normalize}: Mixture above threshold for clipping. The result was normalized. Max:{max_mix}")
+        print(f"\nNormalization Set {is_normalize}: Mixture above threshold for clipping. Max:{max_mix}")
        if is_normalize:
            print(f"The result was normalized.")
            wave /= maxv
            mix /= maxv
        else:
            print(f"The result was not normalized.")
    else:
        print(f"\nNormalization Set {is_normalize}: Input not above threshold for clipping. Max:{maxv}")
@@ -205,75 +209,51 @@ def reduce_vocal_aggressively(X, y, softmask):
    return y_mag * np.exp(1.j * np.angle(y))
 def merge_artifacts(y_mask, thres=0.01, min_range=64, fade_size=32):
-    if min_range < fade_size * 2:
+    mask = y_mask
        raise ValueError('min_range must be >= fade_size * 2')
    idx = np.where(y_mask.min(axis=(0, 1)) > thres)[0]
    start_idx = np.insert(idx[np.where(np.diff(idx) != 1)[0] + 1], 0, idx[0])
    end_idx = np.append(idx[np.where(np.diff(idx) != 1)[0]], idx[-1])
    artifact_idx = np.where(end_idx - start_idx > min_range)[0]
    weight = np.zeros_like(y_mask)
    if len(artifact_idx) > 0:
        start_idx = start_idx[artifact_idx]
        end_idx = end_idx[artifact_idx]
        old_e = None
        for s, e in zip(start_idx, end_idx):
            if old_e is not None and s - old_e < fade_size:
                s = old_e - fade_size * 2
            if s != 0:
                weight[:, :, s:s + fade_size] = np.linspace(0, 1, fade_size)
            else:
                s -= fade_size
            if e != y_mask.shape[2]:
                weight[:, :, e - fade_size:e] = np.linspace(1, 0, fade_size)
            else:
                e += fade_size
            weight[:, :, s + fade_size:e - fade_size] = 1
            old_e = e
    v_mask = 1 - y_mask
    y_mask += weight * v_mask
    return y_mask
 def mask_silence(mag, ref, thres=0.1, min_range=64, fade_size=32):
    if min_range < fade_size * 2:
        raise ValueError('min_range must be >= fade_area * 2')
    mag = mag.copy()
    idx = np.where(ref.mean(axis=(0, 1)) < thres)[0]
    starts = np.insert(idx[np.where(np.diff(idx) != 1)[0] + 1], 0, idx[0])
    ends = np.append(idx[np.where(np.diff(idx) != 1)[0]], idx[-1])
    uninformative = np.where(ends - starts > min_range)[0]
    if len(uninformative) > 0:
        starts = starts[uninformative]
        ends = ends[uninformative]
        old_e = None
        for s, e in zip(starts, ends):
            if old_e is not None and s - old_e < fade_size:
                s = old_e - fade_size * 2
            if s != 0:
                weight = np.linspace(0, 1, fade_size)
                mag[:, :, s:s + fade_size] += weight * ref[:, :, s:s + fade_size]
            else:
                s -= fade_size
            if e != mag.shape[2]:
                weight = np.linspace(1, 0, fade_size)
                mag[:, :, e - fade_size:e] += weight * ref[:, :, e - fade_size:e]
            else:
                e += fade_size
            mag[:, :, s + fade_size:e - fade_size] += ref[:, :, s + fade_size:e - fade_size]
            old_e = e
    return mag
    try:
        if min_range < fade_size * 2:
            raise ValueError('min_range must be >= fade_size * 2')
        idx = np.where(y_mask.min(axis=(0, 1)) > thres)[0]
        start_idx = np.insert(idx[np.where(np.diff(idx) != 1)[0] + 1], 0, idx[0])
        end_idx = np.append(idx[np.where(np.diff(idx) != 1)[0]], idx[-1])
        artifact_idx = np.where(end_idx - start_idx > min_range)[0]
        weight = np.zeros_like(y_mask)
        if len(artifact_idx) > 0:
            start_idx = start_idx[artifact_idx]
            end_idx = end_idx[artifact_idx]
            old_e = None
            for s, e in zip(start_idx, end_idx):
                if old_e is not None and s - old_e < fade_size:
                    s = old_e - fade_size * 2
                if s != 0:
                    weight[:, :, s:s + fade_size] = np.linspace(0, 1, fade_size)
                else:
                    s -= fade_size
                if e != y_mask.shape[2]:
                    weight[:, :, e - fade_size:e] = np.linspace(1, 0, fade_size)
                else:
                    e += fade_size
                weight[:, :, s + fade_size:e - fade_size] = 1
                old_e = e
        v_mask = 1 - y_mask
        y_mask += weight * v_mask
        mask = y_mask
    except Exception as e:
        error_name = f'{type(e).__name__}'
        traceback_text = ''.join(traceback.format_tb(e.__traceback__))
        message = f'{error_name}: "{e}"\n{traceback_text}"'
        print('Post Process Failed: ', message)
    return mask
 def align_wave_head_and_tail(a, b):
    l = min([a[0].size, b[0].size])  
@@ -386,11 +366,11 @@ def mirroring(a, spec_m, input_high_end, mp):
        return np.where(np.abs(input_high_end) <= np.abs(mi), input_high_end, mi)
-def adjust_aggr(mask, is_vocal_model, aggressiveness):
+def adjust_aggr(mask, is_non_accom_stem, aggressiveness):
-    aggr = aggressiveness.get('value', 0.0) * 4
+    aggr = aggressiveness['value']
    if aggr != 0:
-        if is_vocal_model:
+        if is_non_accom_stem:
            aggr = 1 - aggr
        aggr = [aggr, aggr]
@@ -403,6 +383,9 @@ def adjust_aggr(mask, is_vocal_model, aggressiveness):
            mask[ch, :aggressiveness['split_bin']] = np.power(mask[ch, :aggressiveness['split_bin']], 1 + aggr[ch] / 3)
            mask[ch, aggressiveness['split_bin']:] = np.power(mask[ch, aggressiveness['split_bin']:], 1 + aggr[ch])
        # if is_non_accom_stem:
        #     mask = (1.0 - mask)
    return mask
 def stft(wave, nfft, hl):
@@ -442,36 +425,20 @@ def spec_effects(wave, algorithm='Default', value=None):
    return wave      
-def spectrogram_to_wave_bare(spec, hop_length=1024):
+def spectrogram_to_wave_no_mp(spec, n_fft=2048, hop_length=1024):
-    spec_left = np.asfortranarray(spec[0])
+    wave = librosa.istft(spec, n_fft=n_fft, hop_length=hop_length)
-    spec_right = np.asfortranarray(spec[1])
+    
-    wave_left = librosa.istft(spec_left, hop_length=hop_length)
+    if wave.ndim == 1:
-    wave_right = librosa.istft(spec_right, hop_length=hop_length)
+        wave = np.asfortranarray([wave,wave])
    wave = np.asfortranarray([wave_left, wave_right])
    return wave
 def spectrogram_to_wave_no_mp(spec, hop_length=1024):
    if spec.ndim == 2:
        wave = librosa.istft(spec, hop_length=hop_length)
    elif spec.ndim == 3:
        spec_left = np.asfortranarray(spec[0])
        spec_right = np.asfortranarray(spec[1])
        wave_left = librosa.istft(spec_left, hop_length=hop_length)
        wave_right = librosa.istft(spec_right, hop_length=hop_length)
        wave = np.asfortranarray([wave_left, wave_right])
    return wave
 def wave_to_spectrogram_no_mp(wave):
-    wave_left = np.asfortranarray(wave[0])
+    spec = librosa.stft(wave, n_fft=2048, hop_length=1024)
-    wave_right = np.asfortranarray(wave[1])
+    
-
+    if spec.ndim == 1:
-    spec_left = librosa.stft(wave_left, n_fft=2048, hop_length=1024)
+        spec = np.asfortranarray([spec,spec])
    spec_right = librosa.stft(wave_right, n_fft=2048, hop_length=1024)
    spec = np.asfortranarray([spec_left, spec_right])
    return spec
@@ -519,6 +486,8 @@ def ensembling(a, specs):
    return spec
 def ensemble_inputs(audio_input, algorithm, is_normalization, wav_type_set, save_path):
    wavs_ = []
    if algorithm == AVERAGE:
        output = average_audio(audio_input)
@@ -528,10 +497,15 @@ def ensemble_inputs(audio_input, algorithm, is_normalization, wav_type_set, save
        for i in range(len(audio_input)):  
            wave, samplerate = librosa.load(audio_input[i], mono=False, sr=44100)
            wavs_.append(wave)
            spec = wave_to_spectrogram_no_mp(wave)
            specs.append(spec)
        wave_shapes = [w.shape[1] for w in wavs_]
        target_shape = wavs_[wave_shapes.index(max(wave_shapes))]
        output = spectrogram_to_wave_no_mp(ensembling(algorithm, specs))
        output = to_shape(output, target_shape.shape)
    sf.write(save_path, normalize(output.T, is_normalization), samplerate, subtype=wav_type_set)
@@ -555,7 +529,7 @@ def to_shape_minimize(x: np.ndarray, target_shape):
    return np.pad(x, tuple(padding_list), mode='constant')
 def augment_audio(export_path, audio_file, rate, is_normalization, wav_type_set, save_format=None, is_pitch=False):
-    print('Rate: ', rate)
+
    wav, sr = librosa.load(audio_file, sr=44100, mono=False)
    if wav.ndim == 1: