full pipeline
This commit is contained in:
parent
8aa6651fc7
commit
64c7582bf5
|
@ -47,10 +47,10 @@ def process_file(file, base_path, output_path, progress_file):
|
|||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-path', type=str, help='Path to search for files', default='Y:\\clips\\red_rising')
|
||||
parser.add_argument('-progress_file', type=str, help='Place to store all files that have already been processed', default='Y:\\clips\\red_rising\\already_processed.txt')
|
||||
parser.add_argument('-output_path', type=str, help='Path for output files', default='Y:\\clips\\red_rising_split')
|
||||
parser.add_argument('-num_threads', type=int, help='Number of concurrent workers processing files.', default=4)
|
||||
parser.add_argument('--path', type=str, help='Path to search for files', default='Y:\\clips\\red_rising')
|
||||
parser.add_argument('--progress_file', type=str, help='Place to store all files that have already been processed', default='Y:\\clips\\red_rising\\already_processed.txt')
|
||||
parser.add_argument('--output_path', type=str, help='Path for output files', default='Y:\\clips\\red_rising_split')
|
||||
parser.add_argument('--num_threads', type=int, help='Number of concurrent workers processing files.', default=4)
|
||||
args = parser.parse_args()
|
||||
|
||||
processed_files = set()
|
||||
|
|
|
@ -103,13 +103,13 @@ def process_folder(folder, output_path, base_path, progress_file, max_files):
|
|||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-path', type=str, help='Path to search for split files (should be the direct output of phase 1)',
|
||||
parser.add_argument('--path', type=str, help='Path to search for split files (should be the direct output of phase 1)',
|
||||
default='Y:\\clips\\red_rising_split')
|
||||
parser.add_argument('-progress_file', type=str, help='Place to store all folders that have already been processed', default='Y:\\clips\\red_rising_filtered\\already_processed.txt')
|
||||
parser.add_argument('-output_path', type=str, help='Path where sampled&filtered files are sent', default='Y:\\clips\\red_rising_filtered')
|
||||
parser.add_argument('-num_threads', type=int, help='Number of concurrent workers processing files.', default=6)
|
||||
parser.add_argument('-max_samples_per_folder', type=int, help='Maximum number of clips that can be extracted from each folder.', default=999999)
|
||||
parser.add_argument('-classifier_model_opt', type=str, help='Train/test options file that configures the model used to classify the audio clips.',
|
||||
parser.add_argument('--progress_file', type=str, help='Place to store all folders that have already been processed', default='Y:\\clips\\red_rising_filtered\\already_processed.txt')
|
||||
parser.add_argument('--output_path', type=str, help='Path where sampled&filtered files are sent', default='Y:\\clips\\red_rising_filtered')
|
||||
parser.add_argument('--num_threads', type=int, help='Number of concurrent workers processing files.', default=6)
|
||||
parser.add_argument('--max_samples_per_folder', type=int, help='Maximum number of clips that can be extracted from each folder.', default=999999)
|
||||
parser.add_argument('--classifier_model_opt', type=str, help='Train/test options file that configures the model used to classify the audio clips.',
|
||||
default='../options/test_noisy_audio_clips_classifier.yml')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
|
@ -107,7 +107,7 @@ if __name__ == '__main__':
|
|||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-o', type=str, help='Path to the options YAML file used to train the CLIP model', default='../options/train_voice_voice_clip.yml')
|
||||
parser.add_argument('--num_workers', type=int, help='Number concurrent processes to use', default=4)
|
||||
parser.add_argument('--root_path', type=str, help='Root path to search for audio directories from', default='Y:\\clips\\red_rising_filtered')
|
||||
parser.add_argument('--path', type=str, help='Root path to search for audio directories from', default='Y:\\clips\\red_rising_filtered')
|
||||
parser.add_argument('--clip_size', type=int, help='Amount of audio samples to pull from each file', default=22050)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -115,7 +115,7 @@ if __name__ == '__main__':
|
|||
opt = yaml.load(f, Loader=Loader)
|
||||
|
||||
print("Finding applicable files..")
|
||||
all_files = recursively_find_audio_directories(args.root_path)
|
||||
all_files = recursively_find_audio_directories(args.path)
|
||||
print(f"Found {len(all_files)}. Processing.")
|
||||
fn = functools.partial(process_subdir, options=opt, clip_sz=args.clip_size)
|
||||
if args.num_workers > 1:
|
||||
|
|
24
codes/scripts/audio/preparation/pipeline.py
Normal file
24
codes/scripts/audio/preparation/pipeline.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
from subprocess import Popen
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--path', type=str, help='Path to search for files')
|
||||
parser.add_argument('--output_path', type=str, help='Path for output files')
|
||||
args = parser.parse_args()
|
||||
|
||||
cmds = [
|
||||
f"scripts/audio/preparation/phase_1_split_files.py --path={args.path} --progress_file={args.output_path}_t1/progress.txt --num_threads=6 --output_path={args.output_path}_t1",
|
||||
f"scripts/audio/preparation/phase_2_sample_and_filter.py --path={args.output_path}_t1 --progress_file={args.output_path}/progress.txt --num_threads=6 --output_path={args.output_path}",
|
||||
f"scripts/audio/preparation/phase_3_generate_similarities.py --path={args.output_path} --num_workers=4",
|
||||
]
|
||||
os.makedirs(args.output_path, exist_ok=True)
|
||||
os.makedirs(args.output_path + "_t1", exist_ok=True)
|
||||
|
||||
for cmd in cmds:
|
||||
p = Popen("python " + cmd)
|
||||
p.wait()
|
||||
|
||||
shutil.rmtree(args.output_path + "_t1")
|
Loading…
Reference in New Issue
Block a user