add config option to set the rate of sampling randomly vs similar speakers during training

2024-10-16 14:27:58 -05:00 · 2024-10-16 14:27:58 -05:00 · f88097ccf6
commit f88097ccf6
parent 48461833c2
2 changed files with 4 additions and 3 deletions
--- a/vall_e/config.py
+++ b/vall_e/config.py
@ -158,8 +158,9 @@ class Dataset:
 	max_resps: int = 1 # number of samples to target for training
 	p_resp_append: float = 1.0 # probability to append another sample to the training target
 	p_resp_pad_silence: float = 0.0 # probability to pad resp with silence to fit within the next window
-	prompt_similar_top_k: int = 1
-	prompt_similar_top_k_offset: int = 0
+	prompt_similar_p: float = 0.75 # odds of sampling for a similar prompt instead of a random prompt
+	prompt_similar_top_k: int = 1 # top-k similar candidates to sample from 
+	prompt_similar_top_k_offset: int = 0 # offset from the top-k to sample from

 	sample_type: str = "path" # path | speaker
 	sample_order: str = "interleaved" # duration
--- a/vall_e/data.py
+++ b/vall_e/data.py
@ -1014,7 +1014,7 @@ class Dataset(_Dataset):

 		for _ in range(cfg.dataset.max_prompts):
 			if reference is not None and cfg.dataset.prom_sample_similar:
-				path = self.get_similar_utterance( reference, offset = len(prom_list) )
+				path = self.get_similar_utterance( reference, offset = len(prom_list) ) if random.random() < cfg.dataset.prompt_similar_p else random.choice(choices)
 				# yuck
 				if not path:
 					path = random.choice(choices)