From 97ea329a5926d13802733194c97f88d565efbe6f Mon Sep 17 00:00:00 2001 From: James Betker Date: Fri, 17 Sep 2021 15:29:42 -0600 Subject: [PATCH] Make spleeter filter simpler (and hopefully much faster) --- .../spleeter_split_voice_and_background_2.py | 29 ++++--------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py b/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py index 5edf0b01..ca0fea53 100644 --- a/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py +++ b/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py @@ -12,13 +12,12 @@ from scripts.audio.preparation.spleeter_dataset import SpleeterDataset def main(): src_dir = 'F:\\split\\joe_rogan' - output_dir = 'F:\\split\\cleaned\\joe_rogan' - output_dir_bg = 'F:\\split\\background-noise\\joe_rogan' output_sample_rate=22050 batch_size=16 dl = DataLoader(SpleeterDataset(src_dir, output_sample_rate, skip=batch_size*33000), batch_size=batch_size, shuffle=False, num_workers=1, pin_memory=True) separator = Separator('pretrained_models/2stems', input_sr=output_sample_rate) + unacceptable_files = open('unacceptable.txt', 'a') for batch in tqdm(dl): waves = batch['wave'] paths = batch['path'] @@ -26,7 +25,6 @@ def main(): sep = separator.separate(waves) for j in range(sep['vocals'].shape[0]): - wave = waves[j].cpu().numpy()[:durations[j]] vocals = sep['vocals'][j][:durations[j]] bg = sep['accompaniment'][j][:durations[j]] vmax = np.abs(vocals[output_sample_rate:-output_sample_rate]).mean() @@ -34,27 +32,10 @@ def main(): # Only output to the "good" sample dir if the ratio of background noise to vocal noise is high enough. ratio = vmax / (bmax+.0000001) - if ratio >= 4: # These values were derived empirically - od = output_dir - out_sound = wave - elif ratio <= 2: - od = output_dir_bg - out_sound = bg - else: - print(f"Reject {paths[j]}: {ratio}") - continue - - # Strip out channels. - if len(out_sound.shape) > 1: - out_sound = out_sound[:, 0] # Just use the first channel. - - # Compile an output path. - path = paths[j] - reld = str(os.path.relpath(os.path.dirname(path), src_dir)).strip() - os.makedirs(os.path.join(od, reld), exist_ok=True) - output_path = os.path.join(od, reld, os.path.basename(path)) - - wavfile.write(output_path, output_sample_rate, out_sound) + if ratio < 4: # These values were derived empirically + unacceptable_files.write(f'{paths[j]}\n') + unacceptable_files.flush() + unacceptable_files.close() # Uses torch spleeter to divide audio clips into one of two bins: