From 8b6095f68186821435b5399def50f3e191fac7c2 Mon Sep 17 00:00:00 2001 From: mrq Date: Thu, 17 Oct 2024 14:37:21 -0500 Subject: [PATCH] saner defaults, maybe --- vall_e/inference.py | 9 ++------- vall_e/webui.py | 4 ++-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/vall_e/inference.py b/vall_e/inference.py index 150e128..44d2e1e 100755 --- a/vall_e/inference.py +++ b/vall_e/inference.py @@ -103,7 +103,7 @@ class TTS(): return torch.tensor([ id ]) # to-do: trim before quantizing, instead of after - def encode_audio( self, paths, trim_length=9.0 ): + def encode_audio( self, paths, trim_length=5.0 ): # already a tensor, return it if isinstance( paths, Tensor ): return paths @@ -126,14 +126,9 @@ class TTS(): res = torch.cat(proms) - # kludge, but it's to correct an oversight in training if trim_length: res = repeat_extend_audio( res, int( cfg.dataset.frames_per_second * trim_length ) ) - - """ - if trim_length: - res = trim( res, int( cfg.dataset.frames_per_second * trim_length ) ) - """ + #res = trim( res, int( cfg.dataset.frames_per_second * trim_length ) ) return res diff --git a/vall_e/webui.py b/vall_e/webui.py index 97f166e..8ce9285 100644 --- a/vall_e/webui.py +++ b/vall_e/webui.py @@ -346,9 +346,9 @@ with ui: with gr.Row(): layout["inference_tts"]["inputs"]["max-seconds"] = gr.Slider(value=12, minimum=1, maximum=32, step=0.1, label="Maximum Seconds", info="Limits how many steps to perform in the AR pass.") #layout["inference_tts"]["inputs"]["max-nar-levels"] = gr.Slider(value=7, minimum=0, maximum=7, step=1, label="Max NAR Levels", info="Limits how many steps to perform in the NAR pass.") - layout["inference_tts"]["inputs"]["input-prompt-length"] = gr.Slider(value=9.0, minimum=0.0, maximum=12.0, step=0.05, label="Input Prompt Trim Length", info="Trims the input prompt down to X seconds. Set 0 to disable.") + layout["inference_tts"]["inputs"]["input-prompt-length"] = gr.Slider(value=5.0, minimum=0.0, maximum=12.0, step=0.05, label="Input Prompt Trim Length", info="Trims the input prompt down to X seconds. Set 0 to disable.") with gr.Row(): - layout["inference_tts"]["inputs"]["ar-temp"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.5, step=0.05, label="Temperature (AR)", info="Modifies the randomness from the samples in the AR. (0 to greedy sample)") + layout["inference_tts"]["inputs"]["ar-temp"] = gr.Slider(value=0.9, minimum=0.0, maximum=1.5, step=0.05, label="Temperature (AR)", info="Modifies the randomness from the samples in the AR. (0 to greedy sample)") layout["inference_tts"]["inputs"]["nar-temp"] = gr.Slider(value=0.0, minimum=0.0, maximum=1.5, step=0.05, label="Temperature (NAR)", info="Modifies the randomness from the samples in the NAR. (0 to greedy sample)") with gr.Row(): #layout["inference_tts"]["inputs"]["input-prompt-prefix"] = gr.Checkbox(label="Input Prompt as Prefix", info="Treats the input prompt clip as the prefix of the generated sequence.")