From f88097ccf659c41f796abd85756d492098e46bef Mon Sep 17 00:00:00 2001
From: mrq <mrq@ecker.tech>
Date: Wed, 16 Oct 2024 14:27:58 -0500
Subject: [PATCH] add config option to set the rate of sampling randomly vs
 similar speakers during training

---
 vall_e/config.py | 5 +++--
 vall_e/data.py   | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/vall_e/config.py b/vall_e/config.py
index 5a1ff98..cfdef7e 100755
--- a/vall_e/config.py
+++ b/vall_e/config.py
@@ -158,8 +158,9 @@ class Dataset:
 	max_resps: int = 1 # number of samples to target for training
 	p_resp_append: float = 1.0 # probability to append another sample to the training target
 	p_resp_pad_silence: float = 0.0 # probability to pad resp with silence to fit within the next window
-	prompt_similar_top_k: int = 1
-	prompt_similar_top_k_offset: int = 0
+	prompt_similar_p: float = 0.75 # odds of sampling for a similar prompt instead of a random prompt
+	prompt_similar_top_k: int = 1 # top-k similar candidates to sample from 
+	prompt_similar_top_k_offset: int = 0 # offset from the top-k to sample from
 
 	sample_type: str = "path" # path | speaker
 	sample_order: str = "interleaved" # duration
diff --git a/vall_e/data.py b/vall_e/data.py
index 8fa6ae1..67a8c55 100755
--- a/vall_e/data.py
+++ b/vall_e/data.py
@@ -1014,7 +1014,7 @@ class Dataset(_Dataset):
 
 		for _ in range(cfg.dataset.max_prompts):
 			if reference is not None and cfg.dataset.prom_sample_similar:
-				path = self.get_similar_utterance( reference, offset = len(prom_list) )
+				path = self.get_similar_utterance( reference, offset = len(prom_list) ) if random.random() < cfg.dataset.prompt_similar_p else random.choice(choices)
 				# yuck
 				if not path:
 					path = random.choice(choices)