moved duration padding for NAR-len to be a scalar instead (since it seems longer utterances need it much more so than shorter utterances)
This commit is contained in:
parent
6aee08f9c0
commit
2a084544e8
|
@ -272,13 +272,13 @@ class TTS():
|
|||
with torch.autocast("cuda", dtype=self.dtype, enabled=self.amp):
|
||||
if model_len is not None:
|
||||
# extra kwargs
|
||||
duration_padding = sampling_kwargs.pop("duration_padding", 1)
|
||||
duration_padding = sampling_kwargs.pop("duration_padding", 1.05)
|
||||
nar_len_prefix_length = sampling_kwargs.pop("nar_len_prefix_length", 0)
|
||||
|
||||
len_list = model_len( text_list=[phns], proms_list=[prom], task_list=["len"], disable_tqdm=not tqdm, **{"max_duration": 5} ) # don't need more than that
|
||||
|
||||
# add an additional X seconds
|
||||
len_list = [ l + duration_padding * cfg.dataset.frames_per_second for l in len_list ]
|
||||
len_list = [ l * duration_padding * cfg.dataset.frames_per_second for l in len_list ]
|
||||
|
||||
kwargs = {}
|
||||
# nasty hardcode to load a reference file and have that as the input target
|
||||
|
|
Loading…
Reference in New Issue
Block a user