From f88097ccf659c41f796abd85756d492098e46bef Mon Sep 17 00:00:00 2001 From: mrq Date: Wed, 16 Oct 2024 14:27:58 -0500 Subject: [PATCH] add config option to set the rate of sampling randomly vs similar speakers during training --- vall_e/config.py | 5 +++-- vall_e/data.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/vall_e/config.py b/vall_e/config.py index 5a1ff98..cfdef7e 100755 --- a/vall_e/config.py +++ b/vall_e/config.py @@ -158,8 +158,9 @@ class Dataset: max_resps: int = 1 # number of samples to target for training p_resp_append: float = 1.0 # probability to append another sample to the training target p_resp_pad_silence: float = 0.0 # probability to pad resp with silence to fit within the next window - prompt_similar_top_k: int = 1 - prompt_similar_top_k_offset: int = 0 + prompt_similar_p: float = 0.75 # odds of sampling for a similar prompt instead of a random prompt + prompt_similar_top_k: int = 1 # top-k similar candidates to sample from + prompt_similar_top_k_offset: int = 0 # offset from the top-k to sample from sample_type: str = "path" # path | speaker sample_order: str = "interleaved" # duration diff --git a/vall_e/data.py b/vall_e/data.py index 8fa6ae1..67a8c55 100755 --- a/vall_e/data.py +++ b/vall_e/data.py @@ -1014,7 +1014,7 @@ class Dataset(_Dataset): for _ in range(cfg.dataset.max_prompts): if reference is not None and cfg.dataset.prom_sample_similar: - path = self.get_similar_utterance( reference, offset = len(prom_list) ) + path = self.get_similar_utterance( reference, offset = len(prom_list) ) if random.random() < cfg.dataset.prompt_similar_p else random.choice(choices) # yuck if not path: path = random.choice(choices)