From 596c2df11ca1ea5918abb845489be72ad8e74b76 Mon Sep 17 00:00:00 2001 From: mrq Date: Tue, 18 Feb 2025 10:49:21 -0600 Subject: [PATCH] added arg to skip processing speakers with not enough utterances for whenever I get around to processing my subest of Emilia for nvidia/audio-codec-44khz (because Emilia has a ton of low-utternace speaker counts and right now my focus with the nemo model is on getting it to actually speak without much problems rather than feed it a gorillion speakers) --- vall_e/emb/process.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vall_e/emb/process.py b/vall_e/emb/process.py index 600f9e2..99c56c1 100644 --- a/vall_e/emb/process.py +++ b/vall_e/emb/process.py @@ -178,6 +178,7 @@ def process( batch_size=1, max_duration=None, max_samples=None, + min_utterances=None, skip_existing_folders=False, low_memory=False, strict_languages=False, @@ -334,6 +335,9 @@ def process( i = 0 presliced = not inpath.exists() + if min_utterances and len(metadata[filename]["segments"]) < min_utterances: + continue + for segment in metadata[filename]["segments"]: id = pad(i, 4) i = i + 1 @@ -410,6 +414,7 @@ def main(): parser.add_argument("--batch-size", type=int, default=0) parser.add_argument("--max-duration", type=int, default=0) parser.add_argument("--max-samples", type=int, default=0) + parser.add_argument("--min-utterances", type=int, default=0) parser.add_argument("--device", type=str, default="cuda") parser.add_argument("--dtype", type=str, default="bfloat16") @@ -442,6 +447,7 @@ def main(): batch_size=args.batch_size, max_duration=args.max_duration, max_samples=args.max_samples, + min_utterances=args.min_utterances, skip_existing_folders=args.skip_existing_folders, strict_languages=args.strict_languages,