From d4e33bf15f4fbbfc1950385764e9cd77300b127b Mon Sep 17 00:00:00 2001 From: James Betker Date: Mon, 9 Aug 2021 11:55:46 -0600 Subject: [PATCH] Fixes to the mp3 splitter --- codes/data/audio/random_mp3_splitter.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/codes/data/audio/random_mp3_splitter.py b/codes/data/audio/random_mp3_splitter.py index 4170f0a1..2785e57b 100644 --- a/codes/data/audio/random_mp3_splitter.py +++ b/codes/data/audio/random_mp3_splitter.py @@ -9,17 +9,27 @@ import torch.nn.functional as F import os.path as osp if __name__ == '__main__': - src_dir = 'E:\\audio\\books' + src_dir = 'O:\\podcast_dumps' + #src_dir = 'E:\\audio\\books' + output_dir = 'D:\\data\\audio\\podcasts-split' + #output_dir = 'E:\\audio\\books-clips' clip_length = 5 # In seconds sparsity = .05 # Only this proportion of the total clips are extracted as wavs. output_sample_rate=22050 - output_dir = 'E:\\audio\\books-clips' files = find_audio_files(src_dir, include_nonwav=True) for e, file in enumerate(tqdm(files)): - if e < 7250: + if e < 1486: continue - file_basis = osp.relpath(file, src_dir).replace('/', '_').replace('\\', '_') + file_basis = osp.relpath(file, src_dir)\ + .replace('/', '_')\ + .replace('\\', '_')\ + .replace('.', '_')\ + .replace(' ', '_')\ + .replace('!', '_')\ + .replace(',', '_') + if len(file_basis) > 100: + file_basis = file_basis[:100] try: wave, sample_rate = audio2numpy.open_audio(file) except: @@ -28,14 +38,16 @@ if __name__ == '__main__': wave = torch.tensor(wave) # Strip out channels. if len(wave.shape) > 1: - wave = wave[0] # Just use the first channel. + wave = wave[:, 1] # Just use the first channel. # Calculate how much data we need to extract for each clip. clip_sz = sample_rate * clip_length interval = int(sample_rate * (clip_length / sparsity)) i = 0 + if wave.shape[-1] == 0: + print("Something went wrong: wave shape is 0.") while (i+clip_sz) < wave.shape[-1]: clip = wave[i:i+clip_sz] clip = F.interpolate(clip.view(1,1,clip_sz), scale_factor=output_sample_rate/sample_rate).squeeze() - wavfile.write(osp.join(output_dir, f'{file_basis}_{i}.wav'), output_sample_rate, clip.numpy()) + wavfile.write(osp.join(output_dir, f'{e}_{file_basis}_{i}.wav'), output_sample_rate, clip.numpy()) i = i + interval