added checkboxes to use the original method for calculating latents (ignores the voice chunk field)

This commit is contained in:
mrq 2023-05-21 01:47:48 +00:00
parent 9e3eca2261
commit 31da215c5f
3 changed files with 11 additions and 6 deletions

@ -1 +1 @@
Subproject commit c90ee7c5296992ad96c8790b5b7cc3737062e1e6 Subproject commit 5ff00bf3bfa97e2c8e9f166b920273f83ac9d8f0

View File

@ -875,7 +875,6 @@ def generate_tortoise(**kwargs):
'k': parameters['candidates'], 'k': parameters['candidates'],
'diffusion_sampler': parameters['diffusion_sampler'], 'diffusion_sampler': parameters['diffusion_sampler'],
'breathing_room': parameters['breathing_room'], 'breathing_room': parameters['breathing_room'],
'progress': parameters['progress'],
'half_p': "Half Precision" in parameters['experimentals'], 'half_p': "Half Precision" in parameters['experimentals'],
'cond_free': "Conditioning-Free" in parameters['experimentals'], 'cond_free': "Conditioning-Free" in parameters['experimentals'],
'cvvp_amount': parameters['cvvp_weight'], 'cvvp_amount': parameters['cvvp_weight'],
@ -1256,7 +1255,7 @@ def update_baseline_for_latents_chunks( voice ):
return int(total_duration / total) if total > 0 else 1 return int(total_duration / total) if total > 0 else 1
return int(total_duration / args.autocalculate_voice_chunk_duration_size) if total_duration > 0 else 1 return int(total_duration / args.autocalculate_voice_chunk_duration_size) if total_duration > 0 else 1
def compute_latents(voice=None, voice_samples=None, voice_latents_chunks=0, progress=None): def compute_latents(voice=None, voice_samples=None, voice_latents_chunks=0, original_ar=False, original_diffusion=False):
global tts global tts
global args global args
@ -1309,7 +1308,7 @@ def compute_latents(voice=None, voice_samples=None, voice_latents_chunks=0, prog
if voice_samples is None: if voice_samples is None:
return return
conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents) conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents, original_ar=original_ar, original_diffusion=original_diffusion)
if len(conditioning_latents) == 4: if len(conditioning_latents) == 4:
conditioning_latents = (conditioning_latents[0], conditioning_latents[1], conditioning_latents[2], None) conditioning_latents = (conditioning_latents[0], conditioning_latents[1], conditioning_latents[2], None)

View File

@ -78,6 +78,8 @@ def generate_proxy(
repetition_penalty, repetition_penalty,
cond_free_k, cond_free_k,
experimentals, experimentals,
voice_latents_original_ar,
voice_latents_original_diffusion,
progress=gr.Progress(track_tqdm=True) progress=gr.Progress(track_tqdm=True)
): ):
kwargs = locals() kwargs = locals()
@ -166,12 +168,12 @@ def reset_generate_settings_proxy():
return tuple(res) return tuple(res)
def compute_latents_proxy(voice, voice_latents_chunks, progress=gr.Progress(track_tqdm=True)): def compute_latents_proxy(voice, voice_latents_chunks, original_ar, original_diffusion, progress=gr.Progress(track_tqdm=True)):
if args.tts_backend == "bark": if args.tts_backend == "bark":
global tts global tts
tts.create_voice( voice ) tts.create_voice( voice )
return voice return voice
compute_latents( voice=voice, voice_latents_chunks=voice_latents_chunks, progress=progress ) compute_latents( voice=voice, voice_latents_chunks=voice_latents_chunks, original_ar=original_ar, original_diffusion=original_diffusion )
return voice return voice
@ -387,6 +389,8 @@ def setup_gradio():
GENERATE_SETTINGS["voice"] = gr.Dropdown(choices=voice_list_with_defaults, label="Voice", type="value", value=voice_list_with_defaults[0]) # it'd be very cash money if gradio was able to default to the first value in the list without this shit GENERATE_SETTINGS["voice"] = gr.Dropdown(choices=voice_list_with_defaults, label="Voice", type="value", value=voice_list_with_defaults[0]) # it'd be very cash money if gradio was able to default to the first value in the list without this shit
GENERATE_SETTINGS["mic_audio"] = gr.Audio( label="Microphone Source", source="microphone", type="filepath", visible=False ) GENERATE_SETTINGS["mic_audio"] = gr.Audio( label="Microphone Source", source="microphone", type="filepath", visible=False )
GENERATE_SETTINGS["voice_latents_chunks"] = gr.Number(label="Voice Chunks", precision=0, value=0, visible=args.tts_backend=="tortoise") GENERATE_SETTINGS["voice_latents_chunks"] = gr.Number(label="Voice Chunks", precision=0, value=0, visible=args.tts_backend=="tortoise")
GENERATE_SETTINGS["voice_latents_original_ar"] = gr.Checkbox(label="Use Original Latents Method (AR)", visible=args.tts_backend=="tortoise")
GENERATE_SETTINGS["voice_latents_original_diffusion"] = gr.Checkbox(label="Use Original Latents Method (Diffusion)", visible=args.tts_backend=="tortoise")
with gr.Row(): with gr.Row():
refresh_voices = gr.Button(value="Refresh Voice List") refresh_voices = gr.Button(value="Refresh Voice List")
recompute_voice_latents = gr.Button(value="(Re)Compute Voice Latents") recompute_voice_latents = gr.Button(value="(Re)Compute Voice Latents")
@ -783,6 +787,8 @@ def setup_gradio():
inputs=[ inputs=[
GENERATE_SETTINGS['voice'], GENERATE_SETTINGS['voice'],
GENERATE_SETTINGS['voice_latents_chunks'], GENERATE_SETTINGS['voice_latents_chunks'],
GENERATE_SETTINGS['voice_latents_original_ar'],
GENERATE_SETTINGS['voice_latents_original_diffusion'],
], ],
outputs=GENERATE_SETTINGS['voice'], outputs=GENERATE_SETTINGS['voice'],
) )