From d4e33bf15f4fbbfc1950385764e9cd77300b127b Mon Sep 17 00:00:00 2001
From: James Betker <jbetker@gmail.com>
Date: Mon, 9 Aug 2021 11:55:46 -0600
Subject: [PATCH] Fixes to the mp3 splitter

---
 codes/data/audio/random_mp3_splitter.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/codes/data/audio/random_mp3_splitter.py b/codes/data/audio/random_mp3_splitter.py
index 4170f0a1..2785e57b 100644
--- a/codes/data/audio/random_mp3_splitter.py
+++ b/codes/data/audio/random_mp3_splitter.py
@@ -9,17 +9,27 @@ import torch.nn.functional as F
 import os.path as osp
 
 if __name__ == '__main__':
-    src_dir = 'E:\\audio\\books'
+    src_dir = 'O:\\podcast_dumps'
+    #src_dir = 'E:\\audio\\books'
+    output_dir = 'D:\\data\\audio\\podcasts-split'
+    #output_dir = 'E:\\audio\\books-clips'
     clip_length = 5  # In seconds
     sparsity = .05  # Only this proportion of the total clips are extracted as wavs.
     output_sample_rate=22050
-    output_dir = 'E:\\audio\\books-clips'
 
     files = find_audio_files(src_dir, include_nonwav=True)
     for e, file in enumerate(tqdm(files)):
-        if e < 7250:
+        if e < 1486:
             continue
-        file_basis = osp.relpath(file, src_dir).replace('/', '_').replace('\\', '_')
+        file_basis = osp.relpath(file, src_dir)\
+            .replace('/', '_')\
+            .replace('\\', '_')\
+            .replace('.', '_')\
+            .replace(' ', '_')\
+            .replace('!', '_')\
+            .replace(',', '_')
+        if len(file_basis) > 100:
+            file_basis = file_basis[:100]
         try:
             wave, sample_rate = audio2numpy.open_audio(file)
         except:
@@ -28,14 +38,16 @@ if __name__ == '__main__':
         wave = torch.tensor(wave)
         # Strip out channels.
         if len(wave.shape) > 1:
-            wave = wave[0]  # Just use the first channel.
+            wave = wave[:, 1]  # Just use the first channel.
 
         # Calculate how much data we need to extract for each clip.
         clip_sz = sample_rate * clip_length
         interval = int(sample_rate * (clip_length / sparsity))
         i = 0
+        if wave.shape[-1] == 0:
+            print("Something went wrong: wave shape is 0.")
         while (i+clip_sz) < wave.shape[-1]:
             clip = wave[i:i+clip_sz]
             clip = F.interpolate(clip.view(1,1,clip_sz), scale_factor=output_sample_rate/sample_rate).squeeze()
-            wavfile.write(osp.join(output_dir, f'{file_basis}_{i}.wav'), output_sample_rate, clip.numpy())
+            wavfile.write(osp.join(output_dir, f'{e}_{file_basis}_{i}.wav'), output_sample_rate, clip.numpy())
             i = i + interval