From 31da215c5f0f77a249dd5131bf8c3a4f149299a3 Mon Sep 17 00:00:00 2001 From: mrq Date: Sun, 21 May 2023 01:47:48 +0000 Subject: [PATCH] added checkboxes to use the original method for calculating latents (ignores the voice chunk field) --- modules/tortoise-tts | 2 +- src/utils.py | 5 ++--- src/webui.py | 10 ++++++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/modules/tortoise-tts b/modules/tortoise-tts index c90ee7c..5ff00bf 160000 --- a/modules/tortoise-tts +++ b/modules/tortoise-tts @@ -1 +1 @@ -Subproject commit c90ee7c5296992ad96c8790b5b7cc3737062e1e6 +Subproject commit 5ff00bf3bfa97e2c8e9f166b920273f83ac9d8f0 diff --git a/src/utils.py b/src/utils.py index 3d42e72..fba96f7 100755 --- a/src/utils.py +++ b/src/utils.py @@ -875,7 +875,6 @@ def generate_tortoise(**kwargs): 'k': parameters['candidates'], 'diffusion_sampler': parameters['diffusion_sampler'], 'breathing_room': parameters['breathing_room'], - 'progress': parameters['progress'], 'half_p': "Half Precision" in parameters['experimentals'], 'cond_free': "Conditioning-Free" in parameters['experimentals'], 'cvvp_amount': parameters['cvvp_weight'], @@ -1256,7 +1255,7 @@ def update_baseline_for_latents_chunks( voice ): return int(total_duration / total) if total > 0 else 1 return int(total_duration / args.autocalculate_voice_chunk_duration_size) if total_duration > 0 else 1 -def compute_latents(voice=None, voice_samples=None, voice_latents_chunks=0, progress=None): +def compute_latents(voice=None, voice_samples=None, voice_latents_chunks=0, original_ar=False, original_diffusion=False): global tts global args @@ -1309,7 +1308,7 @@ def compute_latents(voice=None, voice_samples=None, voice_latents_chunks=0, prog if voice_samples is None: return - conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents) + conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents, original_ar=original_ar, original_diffusion=original_diffusion) if len(conditioning_latents) == 4: conditioning_latents = (conditioning_latents[0], conditioning_latents[1], conditioning_latents[2], None) diff --git a/src/webui.py b/src/webui.py index deefe20..fc4ce78 100755 --- a/src/webui.py +++ b/src/webui.py @@ -78,6 +78,8 @@ def generate_proxy( repetition_penalty, cond_free_k, experimentals, + voice_latents_original_ar, + voice_latents_original_diffusion, progress=gr.Progress(track_tqdm=True) ): kwargs = locals() @@ -166,12 +168,12 @@ def reset_generate_settings_proxy(): return tuple(res) -def compute_latents_proxy(voice, voice_latents_chunks, progress=gr.Progress(track_tqdm=True)): +def compute_latents_proxy(voice, voice_latents_chunks, original_ar, original_diffusion, progress=gr.Progress(track_tqdm=True)): if args.tts_backend == "bark": global tts tts.create_voice( voice ) return voice - compute_latents( voice=voice, voice_latents_chunks=voice_latents_chunks, progress=progress ) + compute_latents( voice=voice, voice_latents_chunks=voice_latents_chunks, original_ar=original_ar, original_diffusion=original_diffusion ) return voice @@ -387,6 +389,8 @@ def setup_gradio(): GENERATE_SETTINGS["voice"] = gr.Dropdown(choices=voice_list_with_defaults, label="Voice", type="value", value=voice_list_with_defaults[0]) # it'd be very cash money if gradio was able to default to the first value in the list without this shit GENERATE_SETTINGS["mic_audio"] = gr.Audio( label="Microphone Source", source="microphone", type="filepath", visible=False ) GENERATE_SETTINGS["voice_latents_chunks"] = gr.Number(label="Voice Chunks", precision=0, value=0, visible=args.tts_backend=="tortoise") + GENERATE_SETTINGS["voice_latents_original_ar"] = gr.Checkbox(label="Use Original Latents Method (AR)", visible=args.tts_backend=="tortoise") + GENERATE_SETTINGS["voice_latents_original_diffusion"] = gr.Checkbox(label="Use Original Latents Method (Diffusion)", visible=args.tts_backend=="tortoise") with gr.Row(): refresh_voices = gr.Button(value="Refresh Voice List") recompute_voice_latents = gr.Button(value="(Re)Compute Voice Latents") @@ -783,6 +787,8 @@ def setup_gradio(): inputs=[ GENERATE_SETTINGS['voice'], GENERATE_SETTINGS['voice_latents_chunks'], + GENERATE_SETTINGS['voice_latents_original_ar'], + GENERATE_SETTINGS['voice_latents_original_diffusion'], ], outputs=GENERATE_SETTINGS['voice'], )