forked from camenduru/ai-voice-cloning
added checkboxes to use the original method for calculating latents (ignores the voice chunk field)
This commit is contained in:
parent
9e3eca2261
commit
31da215c5f
|
@ -1 +1 @@
|
|||
Subproject commit c90ee7c5296992ad96c8790b5b7cc3737062e1e6
|
||||
Subproject commit 5ff00bf3bfa97e2c8e9f166b920273f83ac9d8f0
|
|
@ -875,7 +875,6 @@ def generate_tortoise(**kwargs):
|
|||
'k': parameters['candidates'],
|
||||
'diffusion_sampler': parameters['diffusion_sampler'],
|
||||
'breathing_room': parameters['breathing_room'],
|
||||
'progress': parameters['progress'],
|
||||
'half_p': "Half Precision" in parameters['experimentals'],
|
||||
'cond_free': "Conditioning-Free" in parameters['experimentals'],
|
||||
'cvvp_amount': parameters['cvvp_weight'],
|
||||
|
@ -1256,7 +1255,7 @@ def update_baseline_for_latents_chunks( voice ):
|
|||
return int(total_duration / total) if total > 0 else 1
|
||||
return int(total_duration / args.autocalculate_voice_chunk_duration_size) if total_duration > 0 else 1
|
||||
|
||||
def compute_latents(voice=None, voice_samples=None, voice_latents_chunks=0, progress=None):
|
||||
def compute_latents(voice=None, voice_samples=None, voice_latents_chunks=0, original_ar=False, original_diffusion=False):
|
||||
global tts
|
||||
global args
|
||||
|
||||
|
@ -1309,7 +1308,7 @@ def compute_latents(voice=None, voice_samples=None, voice_latents_chunks=0, prog
|
|||
if voice_samples is None:
|
||||
return
|
||||
|
||||
conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents)
|
||||
conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents, original_ar=original_ar, original_diffusion=original_diffusion)
|
||||
|
||||
if len(conditioning_latents) == 4:
|
||||
conditioning_latents = (conditioning_latents[0], conditioning_latents[1], conditioning_latents[2], None)
|
||||
|
|
10
src/webui.py
10
src/webui.py
|
@ -78,6 +78,8 @@ def generate_proxy(
|
|||
repetition_penalty,
|
||||
cond_free_k,
|
||||
experimentals,
|
||||
voice_latents_original_ar,
|
||||
voice_latents_original_diffusion,
|
||||
progress=gr.Progress(track_tqdm=True)
|
||||
):
|
||||
kwargs = locals()
|
||||
|
@ -166,12 +168,12 @@ def reset_generate_settings_proxy():
|
|||
|
||||
return tuple(res)
|
||||
|
||||
def compute_latents_proxy(voice, voice_latents_chunks, progress=gr.Progress(track_tqdm=True)):
|
||||
def compute_latents_proxy(voice, voice_latents_chunks, original_ar, original_diffusion, progress=gr.Progress(track_tqdm=True)):
|
||||
if args.tts_backend == "bark":
|
||||
global tts
|
||||
tts.create_voice( voice )
|
||||
return voice
|
||||
compute_latents( voice=voice, voice_latents_chunks=voice_latents_chunks, progress=progress )
|
||||
compute_latents( voice=voice, voice_latents_chunks=voice_latents_chunks, original_ar=original_ar, original_diffusion=original_diffusion )
|
||||
return voice
|
||||
|
||||
|
||||
|
@ -387,6 +389,8 @@ def setup_gradio():
|
|||
GENERATE_SETTINGS["voice"] = gr.Dropdown(choices=voice_list_with_defaults, label="Voice", type="value", value=voice_list_with_defaults[0]) # it'd be very cash money if gradio was able to default to the first value in the list without this shit
|
||||
GENERATE_SETTINGS["mic_audio"] = gr.Audio( label="Microphone Source", source="microphone", type="filepath", visible=False )
|
||||
GENERATE_SETTINGS["voice_latents_chunks"] = gr.Number(label="Voice Chunks", precision=0, value=0, visible=args.tts_backend=="tortoise")
|
||||
GENERATE_SETTINGS["voice_latents_original_ar"] = gr.Checkbox(label="Use Original Latents Method (AR)", visible=args.tts_backend=="tortoise")
|
||||
GENERATE_SETTINGS["voice_latents_original_diffusion"] = gr.Checkbox(label="Use Original Latents Method (Diffusion)", visible=args.tts_backend=="tortoise")
|
||||
with gr.Row():
|
||||
refresh_voices = gr.Button(value="Refresh Voice List")
|
||||
recompute_voice_latents = gr.Button(value="(Re)Compute Voice Latents")
|
||||
|
@ -783,6 +787,8 @@ def setup_gradio():
|
|||
inputs=[
|
||||
GENERATE_SETTINGS['voice'],
|
||||
GENERATE_SETTINGS['voice_latents_chunks'],
|
||||
GENERATE_SETTINGS['voice_latents_original_ar'],
|
||||
GENERATE_SETTINGS['voice_latents_original_diffusion'],
|
||||
],
|
||||
outputs=GENERATE_SETTINGS['voice'],
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue
Block a user