From 97ea329a5926d13802733194c97f88d565efbe6f Mon Sep 17 00:00:00 2001
From: James Betker <jbetker@gmail.com>
Date: Fri, 17 Sep 2021 15:29:42 -0600
Subject: [PATCH] Make spleeter filter simpler (and hopefully much faster)

---
 .../spleeter_split_voice_and_background_2.py  | 29 ++++---------------
 1 file changed, 5 insertions(+), 24 deletions(-)

diff --git a/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py b/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py
index 5edf0b01..ca0fea53 100644
--- a/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py
+++ b/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py
@@ -12,13 +12,12 @@ from scripts.audio.preparation.spleeter_dataset import SpleeterDataset
 
 def main():
     src_dir = 'F:\\split\\joe_rogan'
-    output_dir = 'F:\\split\\cleaned\\joe_rogan'
-    output_dir_bg = 'F:\\split\\background-noise\\joe_rogan'
     output_sample_rate=22050
     batch_size=16
 
     dl = DataLoader(SpleeterDataset(src_dir, output_sample_rate, skip=batch_size*33000), batch_size=batch_size, shuffle=False, num_workers=1, pin_memory=True)
     separator = Separator('pretrained_models/2stems', input_sr=output_sample_rate)
+    unacceptable_files = open('unacceptable.txt', 'a')
     for batch in tqdm(dl):
         waves = batch['wave']
         paths = batch['path']
@@ -26,7 +25,6 @@ def main():
 
         sep = separator.separate(waves)
         for j in range(sep['vocals'].shape[0]):
-            wave = waves[j].cpu().numpy()[:durations[j]]
             vocals = sep['vocals'][j][:durations[j]]
             bg = sep['accompaniment'][j][:durations[j]]
             vmax = np.abs(vocals[output_sample_rate:-output_sample_rate]).mean()
@@ -34,27 +32,10 @@ def main():
 
             # Only output to the "good" sample dir if the ratio of background noise to vocal noise is high enough.
             ratio = vmax / (bmax+.0000001)
-            if ratio >= 4:  # These values were derived empirically
-                od = output_dir
-                out_sound = wave
-            elif ratio <= 2:
-                od = output_dir_bg
-                out_sound = bg
-            else:
-                print(f"Reject {paths[j]}: {ratio}")
-                continue
-
-            # Strip out channels.
-            if len(out_sound.shape) > 1:
-                out_sound = out_sound[:, 0]  # Just use the first channel.
-
-            # Compile an output path.
-            path = paths[j]
-            reld = str(os.path.relpath(os.path.dirname(path), src_dir)).strip()
-            os.makedirs(os.path.join(od, reld), exist_ok=True)
-            output_path = os.path.join(od, reld, os.path.basename(path))
-
-            wavfile.write(output_path, output_sample_rate, out_sound)
+            if ratio < 4:  # These values were derived empirically
+                unacceptable_files.write(f'{paths[j]}\n')
+        unacceptable_files.flush()
+    unacceptable_files.close()
 
 
 # Uses torch spleeter to divide audio clips into one of two bins: