From bc603c32317636ae4fb4cb734e42dee954be04a3 Mon Sep 17 00:00:00 2001 From: James Betker Date: Sun, 12 Sep 2021 21:26:45 -0600 Subject: [PATCH] Script adjustments and fixes --- .../spleeter_split_voice_and_background_2.py | 12 +++++------- .../audio/{ => preparation}/split_on_silence.py | 4 ++-- .../audio/spleeter_split_voice_and_background.py | 8 ++++---- 3 files changed, 11 insertions(+), 13 deletions(-) rename codes/scripts/audio/{ => preparation}/split_on_silence.py (96%) diff --git a/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py b/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py index 2e101e33..0166aebd 100644 --- a/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py +++ b/codes/scripts/audio/preparation/spleeter_split_voice_and_background_2.py @@ -11,12 +11,11 @@ from scripts.audio.preparation.spleeter_dataset import SpleeterDataset def main(): - src_dir = 'F:\\split\\podcast-dump0' - output_dir = 'F:\\tmp\\out' - output_dir_bg = 'F:\\tmp\\bg' - output_dir_reject = 'F:\\tmp\\rejected' + src_dir = 'F:\\split\\joe_rogan' + output_dir = 'F:\\split\\cleaned\\joe_rogan' + output_dir_bg = 'F:\\split\\background-noise\\joe_rogan' output_sample_rate=22050 - batch_size=24 + batch_size=16 dl = DataLoader(SpleeterDataset(src_dir, output_sample_rate), batch_size=batch_size, shuffle=False, num_workers=1, pin_memory=True) separator = Separator('pretrained_models/2stems', input_sr=output_sample_rate) @@ -45,8 +44,7 @@ def main(): out_sound = bg else: print(f"Reject {paths[j]}: {ratio}") - od = output_dir_reject - out_sound = wave + continue # Strip out channels. if len(out_sound.shape) > 1: diff --git a/codes/scripts/audio/split_on_silence.py b/codes/scripts/audio/preparation/split_on_silence.py similarity index 96% rename from codes/scripts/audio/split_on_silence.py rename to codes/scripts/audio/preparation/split_on_silence.py index b64374e9..38b7859a 100644 --- a/codes/scripts/audio/split_on_silence.py +++ b/codes/scripts/audio/preparation/split_on_silence.py @@ -15,11 +15,11 @@ def main(): parser.add_argument('--path') parser.add_argument('--out') args = parser.parse_args() - minimum_duration = 5 + minimum_duration = 2 maximum_duration = 20 files = find_audio_files(args.path, include_nonwav=True) for e, wav_file in enumerate(tqdm(files)): - #if e < 4197: + #if e < 1326: # continue print(f"Processing {wav_file}..") outdir = os.path.join(args.out, f'{e}_{os.path.basename(wav_file[:-4])}').replace('.', '').strip() diff --git a/codes/scripts/audio/spleeter_split_voice_and_background.py b/codes/scripts/audio/spleeter_split_voice_and_background.py index 4cfa77d2..cad9b525 100644 --- a/codes/scripts/audio/spleeter_split_voice_and_background.py +++ b/codes/scripts/audio/spleeter_split_voice_and_background.py @@ -13,9 +13,9 @@ import numpy as np # 1. Audio has little to no background noise, saved to "output_dir" # 2. Audio has a lot of background noise, bg noise split off and saved to "output_dir_bg" if __name__ == '__main__': - src_dir = 'F:\\split\\books1' - output_dir = 'F:\\split\\cleaned\\books1' - output_dir_bg = 'F:\\split\\background-noise\\books1' + src_dir = 'F:\\split\\joe_rogan' + output_dir = 'F:\\split\\cleaned\\joe_rogan' + output_dir_bg = 'F:\\split\\background-noise\\joe_rogan' output_sample_rate=22050 os.makedirs(output_dir_bg, exist_ok=True) @@ -63,4 +63,4 @@ if __name__ == '__main__': if len(os.shape) > 1: os = os[:, 0] # Just use the first channel. - wavfile.write(osp.join(od, f'{e}_{file_basis}.wav'), output_sample_rate, os) + wavfile.write(osp.join(od, file_basis, f'{e}.wav'), output_sample_rate, os)