kludge fix for an oversight in the model when trying to train for longer input prompt durations......
This commit is contained in:
parent
84005c5b00
commit
eea70f5698
|
@ -11,7 +11,7 @@ from einops import rearrange
|
|||
from pathlib import Path
|
||||
|
||||
from .emb import g2p, qnt
|
||||
from .emb.qnt import trim, trim_random, unload_model
|
||||
from .emb.qnt import trim, trim_random, unload_model, repeat_extend_audio
|
||||
from .utils import to_device, set_seed, wrapper as ml
|
||||
|
||||
from .config import cfg, Config
|
||||
|
@ -103,7 +103,7 @@ class TTS():
|
|||
return torch.tensor([ id ])
|
||||
|
||||
# to-do: trim before quantizing, instead of after
|
||||
def encode_audio( self, paths, trim_length=0.0 ):
|
||||
def encode_audio( self, paths, trim_length=9.0 ):
|
||||
# already a tensor, return it
|
||||
if isinstance( paths, Tensor ):
|
||||
return paths
|
||||
|
@ -126,8 +126,14 @@ class TTS():
|
|||
|
||||
res = torch.cat(proms)
|
||||
|
||||
# kludge, but it's to correct an oversight in training
|
||||
if trim_length:
|
||||
res = repeat_extend_audio( res, cfg.dataset.frames_per_second * trim_length )
|
||||
|
||||
"""
|
||||
if trim_length:
|
||||
res = trim( res, int( cfg.dataset.frames_per_second * trim_length ) )
|
||||
"""
|
||||
|
||||
return res
|
||||
|
||||
|
|
|
@ -346,7 +346,7 @@ with ui:
|
|||
with gr.Row():
|
||||
layout["inference_tts"]["inputs"]["max-seconds"] = gr.Slider(value=12, minimum=1, maximum=32, step=0.1, label="Maximum Seconds", info="Limits how many steps to perform in the AR pass.")
|
||||
#layout["inference_tts"]["inputs"]["max-nar-levels"] = gr.Slider(value=7, minimum=0, maximum=7, step=1, label="Max NAR Levels", info="Limits how many steps to perform in the NAR pass.")
|
||||
layout["inference_tts"]["inputs"]["input-prompt-length"] = gr.Slider(value=3.0, minimum=0.0, maximum=12.0, step=0.05, label="Input Prompt Trim Length", info="Trims the input prompt down to X seconds. Set 0 to disable.")
|
||||
layout["inference_tts"]["inputs"]["input-prompt-length"] = gr.Slider(value=9.0, minimum=0.0, maximum=12.0, step=0.05, label="Input Prompt Trim Length", info="Trims the input prompt down to X seconds. Set 0 to disable.")
|
||||
with gr.Row():
|
||||
layout["inference_tts"]["inputs"]["ar-temp"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.5, step=0.05, label="Temperature (AR)", info="Modifies the randomness from the samples in the AR. (0 to greedy sample)")
|
||||
layout["inference_tts"]["inputs"]["nar-temp"] = gr.Slider(value=0.0, minimum=0.0, maximum=1.5, step=0.05, label="Temperature (NAR)", info="Modifies the randomness from the samples in the NAR. (0 to greedy sample)")
|
||||
|
|
Loading…
Reference in New Issue
Block a user