From 4f359bffa454f17571d66b71abb85f771889026b Mon Sep 17 00:00:00 2001 From: mrq Date: Fri, 3 Feb 2023 04:56:30 +0000 Subject: [PATCH] Added progress for transforming to audio, changed number inputs to sliders instead --- app.py | 6 +++--- tortoise/api.py | 15 ++++++++------- tortoise/utils/diffusion.py | 34 +++++++++++++++++++++++++--------- 3 files changed, 36 insertions(+), 19 deletions(-) mode change 100644 => 100755 tortoise/utils/diffusion.py diff --git a/app.py b/app.py index fa53470..b94df9f 100755 --- a/app.py +++ b/app.py @@ -121,9 +121,9 @@ def main(): label="Preset", type="value", ) - candidates = gr.Number(value=1, precision=0, label="Candidates") - num_autoregressive_samples = gr.Number(value=128, precision=0, label="Samples") - diffusion_iterations = gr.Number(value=128, precision=0, label="Iterations") + candidates = gr.Slider(value=1, minimum=1, maximum=6, label="Candidates") + num_autoregressive_samples = gr.Slider(value=128, minimum=0, maximum=512, step=1, label="Samples") + diffusion_iterations = gr.Slider(value=128, minimum=0, maximum=512, step=1, label="Iterations") temperature = gr.Slider(value=0.2, minimum=0, maximum=1, step=0.1, label="Temperature") voice = gr.Dropdown( diff --git a/tortoise/api.py b/tortoise/api.py index 6f88e5d..3b63549 100755 --- a/tortoise/api.py +++ b/tortoise/api.py @@ -40,11 +40,12 @@ MODELS = { } def tqdm_override(arr, verbose=False, progress=None, desc=None): + if verbose and desc is not None: + print(desc) + if progress is None: - if verbose and desc is not None: - print(desc) return tqdm(arr, disable=not verbose) - return progress.tqdm(arr, desc=desc) + return progress.tqdm(arr, desc=desc, track_tqdm=True) def download_models(specific_models=None): """ @@ -152,7 +153,7 @@ def fix_autoregressive_output(codes, stop_token, complain=True): return codes -def do_spectrogram_diffusion(diffusion_model, diffuser, latents, conditioning_latents, temperature=1, verbose=True): +def do_spectrogram_diffusion(diffusion_model, diffuser, latents, conditioning_latents, temperature=1, verbose=True, progress=None, desc=None): """ Uses the specified diffusion model to convert discrete codes into a spectrogram. """ @@ -164,7 +165,7 @@ def do_spectrogram_diffusion(diffusion_model, diffuser, latents, conditioning_la noise = torch.randn(output_shape, device=latents.device) * temperature mel = diffuser.p_sample_loop(diffusion_model, output_shape, noise=noise, model_kwargs={'precomputed_aligned_embeddings': precomputed_embeddings}, - progress=verbose) + verbose=verbose, progress=progress, desc=desc) return denormalize_tacotron_mel(mel)[:,:,:output_seq_len] @@ -471,7 +472,7 @@ class TextToSpeech: del auto_conditioning wav_candidates = [] - for b in tqdm_override(range(best_results.shape[0]), verbose=verbose, progress=progress, desc="Transforming autoregressive outputs into audio.."): + for b in range(best_results.shape[0]): codes = best_results[b].unsqueeze(0) latents = best_latents[b].unsqueeze(0) @@ -487,7 +488,7 @@ class TextToSpeech: break mel = do_spectrogram_diffusion(self.diffusion, diffuser, latents, diffusion_conditioning, - temperature=diffusion_temperature, verbose=verbose) + temperature=diffusion_temperature, verbose=verbose, progress=progress, desc="Transforming autoregressive outputs into audio..") wav = self.vocoder.inference(mel) wav_candidates.append(wav.cpu()) diff --git a/tortoise/utils/diffusion.py b/tortoise/utils/diffusion.py old mode 100644 new mode 100755 index e877ff2..74b8086 --- a/tortoise/utils/diffusion.py +++ b/tortoise/utils/diffusion.py @@ -15,6 +15,13 @@ import torch import torch as th from tqdm import tqdm +def tqdm_override(arr, verbose=False, progress=None, desc=None): + if verbose and desc is not None: + print(desc) + + if progress is None: + return tqdm(arr, disable=not verbose) + return progress.tqdm(arr, desc=desc, track_tqdm=True) def normal_kl(mean1, logvar1, mean2, logvar2): """ @@ -540,7 +547,9 @@ class GaussianDiffusion: cond_fn=None, model_kwargs=None, device=None, - progress=False, + verbose=False, + progress=None, + desc=None ): """ Generate samples from the model. @@ -558,7 +567,7 @@ class GaussianDiffusion: pass to the model. This can be used for conditioning. :param device: if specified, the device to create the samples on. If not specified, use a model parameter's device. - :param progress: if True, show a tqdm progress bar. + :param verbose: if True, show a tqdm progress bar. :return: a non-differentiable batch of samples. """ final = None @@ -571,7 +580,9 @@ class GaussianDiffusion: cond_fn=cond_fn, model_kwargs=model_kwargs, device=device, + verbose=verbose, progress=progress, + desc=desc ): final = sample return final["sample"] @@ -586,7 +597,9 @@ class GaussianDiffusion: cond_fn=None, model_kwargs=None, device=None, - progress=False, + verbose=False, + progress=None, + desc=None ): """ Generate samples from the model and yield intermediate samples from @@ -605,7 +618,7 @@ class GaussianDiffusion: img = th.randn(*shape, device=device) indices = list(range(self.num_timesteps))[::-1] - for i in tqdm(indices, disable=not progress): + for i in tqdm_override(indices, verbose=verbose, desc=desc, progress=progress): t = th.tensor([i] * shape[0], device=device) with th.no_grad(): out = self.p_sample( @@ -718,8 +731,9 @@ class GaussianDiffusion: cond_fn=None, model_kwargs=None, device=None, - progress=False, + verbose=False, eta=0.0, + progress=None, ): """ Generate samples from the model using DDIM. @@ -736,8 +750,9 @@ class GaussianDiffusion: cond_fn=cond_fn, model_kwargs=model_kwargs, device=device, - progress=progress, + verbose=verbose, eta=eta, + progress=progress, ): final = sample return final["sample"] @@ -752,8 +767,9 @@ class GaussianDiffusion: cond_fn=None, model_kwargs=None, device=None, - progress=False, + verbose=False, eta=0.0, + progress=None, ): """ Use DDIM to sample from the model and yield intermediate samples from @@ -770,11 +786,11 @@ class GaussianDiffusion: img = th.randn(*shape, device=device) indices = list(range(self.num_timesteps))[::-1] - if progress: + if verbose: # Lazy import so that we don't depend on tqdm. from tqdm.auto import tqdm - indices = tqdm(indices, disable=not progress) + indices = tqdm_override(indices, verbose=verbose, desc="DDIM Sample Loop Progressive", progress=progress) for i in indices: t = th.tensor([i] * shape[0], device=device)