fixed regression where the auto_conds do not move to the GPU and causes a problem during CVVP compare pass

Merge pull request 'Only directories in the voice list' (#20 ) from lightmare/tortoise-tts:only_dirs_in_voice_list into main
Reviewed-on: mrq/tortoise-tts#20
2023-02-11 20:34:12 +00:00 · 2023-02-11 20:14:36 +00:00 · 2023-02-11 18:26:51 +00:00
2 changed files with 13 additions and 4 deletions
--- a/tortoise/api.py
+++ b/tortoise/api.py
@ -483,8 +483,11 @@ class TextToSpeech:
                auto_conditioning, diffusion_conditioning, auto_conds, _ = conditioning_latents
        else:
            auto_conditioning, diffusion_conditioning = self.get_random_conditioning_latents()
+
        auto_conditioning = auto_conditioning.to(self.device)
        diffusion_conditioning = diffusion_conditioning.to(self.device)
+        if auto_conds is not None:
+            auto_conds = auto_conds.to(self.device)

        diffuser = load_discrete_vocoder_diffuser(desired_diffusion_steps=diffusion_iterations, cond_free=cond_free, cond_free_k=cond_free_k)

@ -539,8 +542,10 @@ class TextToSpeech:
                for batch in tqdm_override(samples, verbose=verbose, progress=progress, desc=desc):
                    for i in range(batch.shape[0]):
                        batch[i] = fix_autoregressive_output(batch[i], stop_mel_token)
+
                    if cvvp_amount != 1:
                        clvp = self.clvp(text_tokens.repeat(batch.shape[0], 1), batch, return_loss=False)
+                        
                    if auto_conds is not None and cvvp_amount > 0:
                        cvvp_accumulator = 0
                        for cl in range(auto_conds.shape[1]):
--- a/webui.py
+++ b/webui.py
@ -265,7 +265,7 @@ def generate(
        with open(f'{get_voice_dir()}/{voice}/cond_latents.pth', 'rb') as f:
            info['latents'] = base64.b64encode(f.read()).decode("ascii")

-    if voicefixer:
+    if args.voice_fixer and voicefixer:
        # we could do this on the pieces before they get stiched up anyways to save some compute
        # but the stitching would need to read back from disk, defeating the point of caching the waveform
        for path in progress.tqdm(audio_cache, desc="Running voicefix..."):
@ -443,8 +443,12 @@ def reload_tts():
 def cancel_generate():
    tortoise.api.STOP_SIGNAL = True

+def get_voice_list():
+    voice_dir = get_voice_dir()
+    return [d for d in os.listdir(voice_dir) if os.path.isdir(os.path.join(voice_dir, d))]
+
 def update_voices():
-    return gr.Dropdown.update(choices=sorted(os.listdir(get_voice_dir())) + ["microphone"])
+    return gr.Dropdown.update(choices=sorted(get_voice_list()) + ["microphone"])

 def export_exec_settings( share, listen, check_for_updates, models_from_local_only, low_vram, embed_output_metadata, latents_lean_and_mean, voice_fixer, cond_latent_max_chunk_size, sample_batch_size, concurrency_count, output_sample_rate, output_volume ):
    args.share = share
@ -592,7 +596,7 @@ def setup_gradio():
                    )
                    prompt = gr.Textbox(lines=1, label="Custom Emotion + Prompt (if selected)")
                    voice = gr.Dropdown(
-                        sorted(os.listdir(get_voice_dir())) + ["microphone"],
+                        sorted(get_voice_list()) + ["microphone"],
                        label="Voice",
                        type="value",
                    )
@ -692,7 +696,7 @@ def setup_gradio():
            with gr.Row():
                with gr.Column():
                    history_voices = gr.Dropdown(
-                        sorted(os.listdir(get_voice_dir())) + ["microphone"],
+                        sorted(get_voice_list()) + ["microphone"],
                        label="Voice",
                        type="value",
                    )
Author	SHA1	Message	Date
mrq	5f1c032312	fixed regression where the auto_conds do not move to the GPU and causes a problem during CVVP compare pass	2023-02-11 20:34:12 +00:00
mrq	2f86565969	Merge pull request 'Only directories in the voice list' (#20 ) from lightmare/tortoise-tts:only_dirs_in_voice_list into main Reviewed-on: mrq/tortoise-tts#20	2023-02-11 20:14:36 +00:00
lightmare	192a510ee1	Only directories in the voice list	2023-02-11 18:26:51 +00:00