From 64c7582bf55d1c0b7ae92f68dae5fdfbe34fdbe7 Mon Sep 17 00:00:00 2001 From: James Betker Date: Thu, 28 Apr 2022 22:47:26 -0600 Subject: [PATCH] full pipeline --- .../audio/preparation/phase_1_split_files.py | 8 +++---- .../preparation/phase_2_sample_and_filter.py | 12 +++++----- .../phase_3_generate_similarities.py | 4 ++-- codes/scripts/audio/preparation/pipeline.py | 24 +++++++++++++++++++ 4 files changed, 36 insertions(+), 12 deletions(-) create mode 100644 codes/scripts/audio/preparation/pipeline.py diff --git a/codes/scripts/audio/preparation/phase_1_split_files.py b/codes/scripts/audio/preparation/phase_1_split_files.py index b403e71f..812353d7 100644 --- a/codes/scripts/audio/preparation/phase_1_split_files.py +++ b/codes/scripts/audio/preparation/phase_1_split_files.py @@ -47,10 +47,10 @@ def process_file(file, base_path, output_path, progress_file): if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('-path', type=str, help='Path to search for files', default='Y:\\clips\\red_rising') - parser.add_argument('-progress_file', type=str, help='Place to store all files that have already been processed', default='Y:\\clips\\red_rising\\already_processed.txt') - parser.add_argument('-output_path', type=str, help='Path for output files', default='Y:\\clips\\red_rising_split') - parser.add_argument('-num_threads', type=int, help='Number of concurrent workers processing files.', default=4) + parser.add_argument('--path', type=str, help='Path to search for files', default='Y:\\clips\\red_rising') + parser.add_argument('--progress_file', type=str, help='Place to store all files that have already been processed', default='Y:\\clips\\red_rising\\already_processed.txt') + parser.add_argument('--output_path', type=str, help='Path for output files', default='Y:\\clips\\red_rising_split') + parser.add_argument('--num_threads', type=int, help='Number of concurrent workers processing files.', default=4) args = parser.parse_args() processed_files = set() diff --git a/codes/scripts/audio/preparation/phase_2_sample_and_filter.py b/codes/scripts/audio/preparation/phase_2_sample_and_filter.py index 91fef310..0528d7a3 100644 --- a/codes/scripts/audio/preparation/phase_2_sample_and_filter.py +++ b/codes/scripts/audio/preparation/phase_2_sample_and_filter.py @@ -103,13 +103,13 @@ def process_folder(folder, output_path, base_path, progress_file, max_files): if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('-path', type=str, help='Path to search for split files (should be the direct output of phase 1)', + parser.add_argument('--path', type=str, help='Path to search for split files (should be the direct output of phase 1)', default='Y:\\clips\\red_rising_split') - parser.add_argument('-progress_file', type=str, help='Place to store all folders that have already been processed', default='Y:\\clips\\red_rising_filtered\\already_processed.txt') - parser.add_argument('-output_path', type=str, help='Path where sampled&filtered files are sent', default='Y:\\clips\\red_rising_filtered') - parser.add_argument('-num_threads', type=int, help='Number of concurrent workers processing files.', default=6) - parser.add_argument('-max_samples_per_folder', type=int, help='Maximum number of clips that can be extracted from each folder.', default=999999) - parser.add_argument('-classifier_model_opt', type=str, help='Train/test options file that configures the model used to classify the audio clips.', + parser.add_argument('--progress_file', type=str, help='Place to store all folders that have already been processed', default='Y:\\clips\\red_rising_filtered\\already_processed.txt') + parser.add_argument('--output_path', type=str, help='Path where sampled&filtered files are sent', default='Y:\\clips\\red_rising_filtered') + parser.add_argument('--num_threads', type=int, help='Number of concurrent workers processing files.', default=6) + parser.add_argument('--max_samples_per_folder', type=int, help='Maximum number of clips that can be extracted from each folder.', default=999999) + parser.add_argument('--classifier_model_opt', type=str, help='Train/test options file that configures the model used to classify the audio clips.', default='../options/test_noisy_audio_clips_classifier.yml') args = parser.parse_args() diff --git a/codes/scripts/audio/preparation/phase_3_generate_similarities.py b/codes/scripts/audio/preparation/phase_3_generate_similarities.py index ad564bda..435d5ea4 100644 --- a/codes/scripts/audio/preparation/phase_3_generate_similarities.py +++ b/codes/scripts/audio/preparation/phase_3_generate_similarities.py @@ -107,7 +107,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-o', type=str, help='Path to the options YAML file used to train the CLIP model', default='../options/train_voice_voice_clip.yml') parser.add_argument('--num_workers', type=int, help='Number concurrent processes to use', default=4) - parser.add_argument('--root_path', type=str, help='Root path to search for audio directories from', default='Y:\\clips\\red_rising_filtered') + parser.add_argument('--path', type=str, help='Root path to search for audio directories from', default='Y:\\clips\\red_rising_filtered') parser.add_argument('--clip_size', type=int, help='Amount of audio samples to pull from each file', default=22050) args = parser.parse_args() @@ -115,7 +115,7 @@ if __name__ == '__main__': opt = yaml.load(f, Loader=Loader) print("Finding applicable files..") - all_files = recursively_find_audio_directories(args.root_path) + all_files = recursively_find_audio_directories(args.path) print(f"Found {len(all_files)}. Processing.") fn = functools.partial(process_subdir, options=opt, clip_sz=args.clip_size) if args.num_workers > 1: diff --git a/codes/scripts/audio/preparation/pipeline.py b/codes/scripts/audio/preparation/pipeline.py new file mode 100644 index 00000000..0e98c9c3 --- /dev/null +++ b/codes/scripts/audio/preparation/pipeline.py @@ -0,0 +1,24 @@ +import argparse +import os +import shutil +from subprocess import Popen + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--path', type=str, help='Path to search for files') + parser.add_argument('--output_path', type=str, help='Path for output files') + args = parser.parse_args() + + cmds = [ + f"scripts/audio/preparation/phase_1_split_files.py --path={args.path} --progress_file={args.output_path}_t1/progress.txt --num_threads=6 --output_path={args.output_path}_t1", + f"scripts/audio/preparation/phase_2_sample_and_filter.py --path={args.output_path}_t1 --progress_file={args.output_path}/progress.txt --num_threads=6 --output_path={args.output_path}", + f"scripts/audio/preparation/phase_3_generate_similarities.py --path={args.output_path} --num_workers=4", + ] + os.makedirs(args.output_path, exist_ok=True) + os.makedirs(args.output_path + "_t1", exist_ok=True) + + for cmd in cmds: + p = Popen("python " + cmd) + p.wait() + + shutil.rmtree(args.output_path + "_t1")