moved duration padding for NAR-len to be a scalar instead (since it seems longer utterances need it much more so than shorter utterances)
This commit is contained in:
parent
6aee08f9c0
commit
2a084544e8
|
@ -272,13 +272,13 @@ class TTS():
|
||||||
with torch.autocast("cuda", dtype=self.dtype, enabled=self.amp):
|
with torch.autocast("cuda", dtype=self.dtype, enabled=self.amp):
|
||||||
if model_len is not None:
|
if model_len is not None:
|
||||||
# extra kwargs
|
# extra kwargs
|
||||||
duration_padding = sampling_kwargs.pop("duration_padding", 1)
|
duration_padding = sampling_kwargs.pop("duration_padding", 1.05)
|
||||||
nar_len_prefix_length = sampling_kwargs.pop("nar_len_prefix_length", 0)
|
nar_len_prefix_length = sampling_kwargs.pop("nar_len_prefix_length", 0)
|
||||||
|
|
||||||
len_list = model_len( text_list=[phns], proms_list=[prom], task_list=["len"], disable_tqdm=not tqdm, **{"max_duration": 5} ) # don't need more than that
|
len_list = model_len( text_list=[phns], proms_list=[prom], task_list=["len"], disable_tqdm=not tqdm, **{"max_duration": 5} ) # don't need more than that
|
||||||
|
|
||||||
# add an additional X seconds
|
# add an additional X seconds
|
||||||
len_list = [ l + duration_padding * cfg.dataset.frames_per_second for l in len_list ]
|
len_list = [ l * duration_padding * cfg.dataset.frames_per_second for l in len_list ]
|
||||||
|
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
# nasty hardcode to load a reference file and have that as the input target
|
# nasty hardcode to load a reference file and have that as the input target
|
||||||
|
|
Loading…
Reference in New Issue
Block a user