From 8331eee6fa9c5e70f37d841ac835b2c025d1d06f Mon Sep 17 00:00:00 2001 From: mrq Date: Tue, 18 Feb 2025 10:19:17 -0600 Subject: [PATCH] added arg to limit vall_e.emb.process batch size since there's some speaker groups in LibriLight/Speech/whatever that have 10K utterances and I'm going impatient --- vall_e/emb/process.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vall_e/emb/process.py b/vall_e/emb/process.py index d22592d..600f9e2 100644 --- a/vall_e/emb/process.py +++ b/vall_e/emb/process.py @@ -177,6 +177,7 @@ def process( slice="auto", batch_size=1, max_duration=None, + max_samples=None, skip_existing_folders=False, low_memory=False, strict_languages=False, @@ -373,6 +374,10 @@ def process( continue jobs.append(( outpath, waveform if presliced else waveform[:, start:end], sample_rate, text, language )) + if max_samples and len(jobs) >= max_samples: + break + if not low_memory and max_samples and len(jobs) >= max_samples: + break # processes audio files one at a time if low_memory: @@ -404,6 +409,7 @@ def main(): parser.add_argument("--slice", type=str, default="auto") parser.add_argument("--batch-size", type=int, default=0) parser.add_argument("--max-duration", type=int, default=0) + parser.add_argument("--max-samples", type=int, default=0) parser.add_argument("--device", type=str, default="cuda") parser.add_argument("--dtype", type=str, default="bfloat16") @@ -435,6 +441,7 @@ def main(): slice=args.slice, batch_size=args.batch_size, max_duration=args.max_duration, + max_samples=args.max_samples, skip_existing_folders=args.skip_existing_folders, strict_languages=args.strict_languages,