From 76e2c497f75ab8086158b533ddb65145eec24492 Mon Sep 17 00:00:00 2001 From: James Betker Date: Thu, 9 Sep 2021 23:34:56 -0600 Subject: [PATCH] Improvements to splitter --- codes/models/spleeter/separator.py | 2 +- .../spleeter_split_voice_and_background_2.py | 27 ++++++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/codes/models/spleeter/separator.py b/codes/models/spleeter/separator.py index ceafd17f..bda3e25e 100644 --- a/codes/models/spleeter/separator.py +++ b/codes/models/spleeter/separator.py @@ -29,4 +29,4 @@ class Separator: return { 'vocals': res[0].cpu().numpy(), 'accompaniment': res[1].cpu().numpy() - } \ No newline at end of file + } diff --git a/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py b/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py index a647d405..2e101e33 100644 --- a/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py +++ b/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py @@ -14,6 +14,7 @@ def main(): src_dir = 'F:\\split\\podcast-dump0' output_dir = 'F:\\tmp\\out' output_dir_bg = 'F:\\tmp\\bg' + output_dir_reject = 'F:\\tmp\\rejected' output_sample_rate=22050 batch_size=24 @@ -28,34 +29,34 @@ def main(): sep = separator.separate(waves) for j in range(sep['vocals'].shape[0]): - vocals = sep['vocals'][j] - bg = sep['accompaniment'][j] - vmax = np.abs(vocals).mean() - bmax = np.abs(bg).mean() + wave = waves[j].cpu().numpy()[:durations[j]] + vocals = sep['vocals'][j][:durations[j]] + bg = sep['accompaniment'][j][:durations[j]] + vmax = np.abs(vocals[output_sample_rate:-output_sample_rate]).mean() + bmax = np.abs(bg[output_sample_rate:-output_sample_rate]).mean() # Only output to the "good" sample dir if the ratio of background noise to vocal noise is high enough. ratio = vmax / (bmax+.0000001) - if ratio >= 25: # These values were derived empirically + if ratio >= 4: # These values were derived empirically od = output_dir - out_sound = waves[j].cpu().numpy() - elif ratio <= 1: + out_sound = wave + elif ratio <= 2: od = output_dir_bg out_sound = bg else: - continue + print(f"Reject {paths[j]}: {ratio}") + od = output_dir_reject + out_sound = wave # Strip out channels. if len(out_sound.shape) > 1: out_sound = out_sound[:, 0] # Just use the first channel. - # Resize to true duration - out_sound = out_sound[:durations[j]] # Compile an output path. path = paths[j] - reld = os.path.relpath(os.path.dirname(path), src_dir) + reld = str(os.path.relpath(os.path.dirname(path), src_dir)).strip() os.makedirs(os.path.join(od, reld), exist_ok=True) - relp = os.path.relpath(path, src_dir) - output_path = os.path.join(od, relp) + output_path = os.path.join(od, reld, os.path.basename(path)) wavfile.write(output_path, output_sample_rate, out_sound)