diff --git a/tortoise/api.py b/tortoise/api.py index 4d499c2..00582b1 100755 --- a/tortoise/api.py +++ b/tortoise/api.py @@ -351,7 +351,7 @@ class TextToSpeech: chunk_size = int(chunk_size / 2) print(f"Using method 1: size of best fit: {chunk_size}") - chunks = torch.chunk(concat, int(concat.shape[-1] / chunk_size) + 1, dim=1) + chunks = torch.chunk(concat, int(concat.shape[-1] / chunk_size), dim=1) # default new behavior: use the smallest voice sample as a common chunk size else: if chunk_size is None: diff --git a/webui.py b/webui.py index 5d09ace..47d04a7 100755 --- a/webui.py +++ b/webui.py @@ -65,7 +65,7 @@ def generate( mic = load_audio(mic_audio, tts.input_sample_rate) voice_samples, conditioning_latents = [mic], None elif voice == "random": - voice_samples, conditioning_latents = None, None + voice_samples, conditioning_latents = None, tts.get_random_conditioning_latents() else: progress(0, desc="Loading voice...") voice_samples, conditioning_latents = load_voice(voice) @@ -469,9 +469,8 @@ def reload_tts(): def cancel_generate(): tortoise.api.STOP_SIGNAL = True -def get_voice_list(): - voice_dir = get_voice_dir() - return sorted([d for d in os.listdir(voice_dir) if os.path.isdir(os.path.join(voice_dir, d))]) + ["microphone", "random"] +def get_voice_list(dir=get_voice_dir()): + return sorted([d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d)) and len(os.listdir(os.path.join(dir, d))) > 0 ]) + ["microphone", "random"] def update_voices(): return gr.Dropdown.update(choices=get_voice_list()) @@ -732,7 +731,7 @@ def setup_gradio(): with gr.Row(): with gr.Column(): history_voices = gr.Dropdown( - get_voice_list(), + get_voice_list("./results/"), label="Voice", type="value", )