From 596c2df11ca1ea5918abb845489be72ad8e74b76 Mon Sep 17 00:00:00 2001
From: mrq <mrq@ecker.tech>
Date: Tue, 18 Feb 2025 10:49:21 -0600
Subject: [PATCH] added arg to skip processing speakers with not enough
 utterances for whenever I get around to processing my subest of Emilia for
 nvidia/audio-codec-44khz (because Emilia has a ton of low-utternace speaker
 counts and right now my focus with the nemo model is on getting it to
 actually speak without much problems rather than feed it a gorillion
 speakers)

---
 vall_e/emb/process.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/vall_e/emb/process.py b/vall_e/emb/process.py
index 600f9e2..99c56c1 100644
--- a/vall_e/emb/process.py
+++ b/vall_e/emb/process.py
@@ -178,6 +178,7 @@ def process(
 	batch_size=1,
 	max_duration=None,
 	max_samples=None,
+	min_utterances=None,
 	skip_existing_folders=False,
 	low_memory=False,
 	strict_languages=False,
@@ -334,6 +335,9 @@ def process(
 					i = 0
 					presliced = not inpath.exists()
 					
+					if min_utterances and len(metadata[filename]["segments"]) < min_utterances:
+						continue
+
 					for segment in metadata[filename]["segments"]:
 						id = pad(i, 4)
 						i = i + 1
@@ -410,6 +414,7 @@ def main():
 	parser.add_argument("--batch-size", type=int, default=0)
 	parser.add_argument("--max-duration", type=int, default=0)
 	parser.add_argument("--max-samples", type=int, default=0)
+	parser.add_argument("--min-utterances", type=int, default=0)
 	
 	parser.add_argument("--device", type=str, default="cuda")
 	parser.add_argument("--dtype", type=str, default="bfloat16")
@@ -442,6 +447,7 @@ def main():
 		batch_size=args.batch_size,
 		max_duration=args.max_duration,
 		max_samples=args.max_samples,
+		min_utterances=args.min_utterances,
 		skip_existing_folders=args.skip_existing_folders,
 		strict_languages=args.strict_languages,