removed kludgy wrappers for passing progress when I was a pythonlet and didn't know gradio can hook into tqdm outputs anyways

This commit is contained in:
mrq 2023-05-04 23:39:39 +00:00
parent 086aad5b49
commit c90ee7c529
2 changed files with 11 additions and 40 deletions

View File

@ -83,16 +83,6 @@ def check_for_kill_signal():
STOP_SIGNAL = False STOP_SIGNAL = False
raise Exception("Kill signal detected") raise Exception("Kill signal detected")
def tqdm_override(arr, verbose=False, progress=None, desc=None):
check_for_kill_signal()
if verbose and desc is not None:
print(desc)
if progress is None:
return tqdm(arr, disable=not verbose)
return progress.tqdm(arr, desc=f'{progress.msg_prefix} {desc}' if hasattr(progress, 'msg_prefix') else desc)
def download_models(specific_models=None): def download_models(specific_models=None):
""" """
Call to download all the models that Tortoise uses. Call to download all the models that Tortoise uses.
@ -205,7 +195,7 @@ def fix_autoregressive_output(codes, stop_token, complain=True):
return codes return codes
def do_spectrogram_diffusion(diffusion_model, diffuser, latents, conditioning_latents, temperature=1, verbose=True, progress=None, desc=None, sampler="P", input_sample_rate=22050, output_sample_rate=24000): def do_spectrogram_diffusion(diffusion_model, diffuser, latents, conditioning_latents, temperature=1, verbose=True, desc=None, sampler="P", input_sample_rate=22050, output_sample_rate=24000):
""" """
Uses the specified diffusion model to convert discrete codes into a spectrogram. Uses the specified diffusion model to convert discrete codes into a spectrogram.
""" """
@ -218,8 +208,7 @@ def do_spectrogram_diffusion(diffusion_model, diffuser, latents, conditioning_la
diffuser.sampler = sampler.lower() diffuser.sampler = sampler.lower()
mel = diffuser.sample_loop(diffusion_model, output_shape, noise=noise, mel = diffuser.sample_loop(diffusion_model, output_shape, noise=noise,
model_kwargs={'precomputed_aligned_embeddings': precomputed_embeddings}, model_kwargs={'precomputed_aligned_embeddings': precomputed_embeddings}, desc=desc)
verbose=verbose, progress=progress, desc=desc)
mel = denormalize_tacotron_mel(mel)[:,:,:output_seq_len] mel = denormalize_tacotron_mel(mel)[:,:,:output_seq_len]
if get_device_name() == "dml": if get_device_name() == "dml":
@ -459,7 +448,7 @@ class TextToSpeech:
if self.preloaded_tensors: if self.preloaded_tensors:
self.cvvp = migrate_to_device( self.cvvp, self.device ) self.cvvp = migrate_to_device( self.cvvp, self.device )
def get_conditioning_latents(self, voice_samples, return_mels=False, verbose=False, progress=None, slices=1, max_chunk_size=None, force_cpu=False): def get_conditioning_latents(self, voice_samples, return_mels=False, verbose=False, slices=1, max_chunk_size=None, force_cpu=False):
""" """
Transforms one or more voice_samples into a tuple (autoregressive_conditioning_latent, diffusion_conditioning_latent). Transforms one or more voice_samples into a tuple (autoregressive_conditioning_latent, diffusion_conditioning_latent).
These are expressive learned latents that encode aspects of the provided clips like voice, intonation, and acoustic These are expressive learned latents that encode aspects of the provided clips like voice, intonation, and acoustic
@ -503,7 +492,7 @@ class TextToSpeech:
chunk_size = chunks[0].shape[-1] chunk_size = chunks[0].shape[-1]
auto_conds = [] auto_conds = []
for chunk in tqdm_override(chunks, verbose=verbose, progress=progress, desc="Computing AR conditioning latents..."): for chunk in tqdm(chunks, desc="Computing AR conditioning latents..."):
auto_conds.append(format_conditioning(chunk, device=device, sampling_rate=self.input_sample_rate, cond_length=chunk_size)) auto_conds.append(format_conditioning(chunk, device=device, sampling_rate=self.input_sample_rate, cond_length=chunk_size))
auto_conds = torch.stack(auto_conds, dim=1) auto_conds = torch.stack(auto_conds, dim=1)
@ -512,7 +501,7 @@ class TextToSpeech:
self.autoregressive = migrate_to_device( self.autoregressive, self.device if self.preloaded_tensors else 'cpu' ) self.autoregressive = migrate_to_device( self.autoregressive, self.device if self.preloaded_tensors else 'cpu' )
diffusion_conds = [] diffusion_conds = []
for chunk in tqdm_override(chunks, verbose=verbose, progress=progress, desc="Computing diffusion conditioning latents..."): for chunk in tqdm(chunks, desc="Computing diffusion conditioning latents..."):
check_for_kill_signal() check_for_kill_signal()
chunk = pad_or_truncate(chunk, chunk_size) chunk = pad_or_truncate(chunk, chunk_size)
cond_mel = wav_to_univnet_mel(migrate_to_device( chunk, device ), do_normalization=False, device=device) cond_mel = wav_to_univnet_mel(migrate_to_device( chunk, device ), do_normalization=False, device=device)
@ -576,7 +565,6 @@ class TextToSpeech:
diffusion_sampler="P", diffusion_sampler="P",
breathing_room=8, breathing_room=8,
half_p=False, half_p=False,
progress=None,
**hf_generate_kwargs): **hf_generate_kwargs):
""" """
Produces an audio clip of the given text being spoken with the given reference voice. Produces an audio clip of the given text being spoken with the given reference voice.
@ -681,7 +669,7 @@ class TextToSpeech:
text_tokens = migrate_to_device( text_tokens, self.device ) text_tokens = migrate_to_device( text_tokens, self.device )
with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=half_p): with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=half_p):
for b in tqdm_override(range(num_batches), verbose=verbose, progress=progress, desc="Generating autoregressive samples"): for b in tqdm(range(num_batches), desc="Generating autoregressive samples"):
check_for_kill_signal() check_for_kill_signal()
codes = self.autoregressive.inference_speech(auto_conditioning, text_tokens, codes = self.autoregressive.inference_speech(auto_conditioning, text_tokens,
do_sample=True, do_sample=True,
@ -730,7 +718,7 @@ class TextToSpeech:
desc = f"Computing best candidates using CLVP {((1-cvvp_amount) * 100):2.0f}% and CVVP {(cvvp_amount * 100):2.0f}%" desc = f"Computing best candidates using CLVP {((1-cvvp_amount) * 100):2.0f}% and CVVP {(cvvp_amount * 100):2.0f}%"
for batch in tqdm_override(samples, verbose=verbose, progress=progress, desc=desc): for batch in tqdm(samples, desc=desc):
check_for_kill_signal() check_for_kill_signal()
for i in range(batch.shape[0]): for i in range(batch.shape[0]):
batch[i] = fix_autoregressive_output(batch[i], stop_mel_token) batch[i] = fix_autoregressive_output(batch[i], stop_mel_token)
@ -815,7 +803,7 @@ class TextToSpeech:
break break
mel = do_spectrogram_diffusion(self.diffusion, diffuser, latents, diffusion_conditioning, mel = do_spectrogram_diffusion(self.diffusion, diffuser, latents, diffusion_conditioning,
temperature=diffusion_temperature, verbose=verbose, progress=progress, desc="Transforming autoregressive outputs into audio..", sampler=diffusion_sampler, temperature=diffusion_temperature, desc="Transforming autoregressive outputs into audio..", sampler=diffusion_sampler,
input_sample_rate=self.input_sample_rate, output_sample_rate=self.output_sample_rate) input_sample_rate=self.input_sample_rate, output_sample_rate=self.output_sample_rate)
wav = self.vocoder.inference(mel) wav = self.vocoder.inference(mel)

View File

@ -13,15 +13,7 @@ import math
import numpy as np import numpy as np
import torch import torch
import torch as th import torch as th
from tqdm import tqdm from tqdm.auto import tqdm
def tqdm_override(arr, verbose=False, progress=None, desc=None):
if verbose and desc is not None:
print(desc)
if progress is None:
return tqdm(arr, disable=not verbose)
return progress.tqdm(arr, desc=f'{progress.msg_prefix} {desc}' if hasattr(progress, 'msg_prefix') else desc)
def normal_kl(mean1, logvar1, mean2, logvar2): def normal_kl(mean1, logvar1, mean2, logvar2):
""" """
@ -556,7 +548,6 @@ class GaussianDiffusion:
model_kwargs=None, model_kwargs=None,
device=None, device=None,
verbose=False, verbose=False,
progress=None,
desc=None desc=None
): ):
""" """
@ -589,7 +580,6 @@ class GaussianDiffusion:
model_kwargs=model_kwargs, model_kwargs=model_kwargs,
device=device, device=device,
verbose=verbose, verbose=verbose,
progress=progress,
desc=desc desc=desc
): ):
final = sample final = sample
@ -606,7 +596,6 @@ class GaussianDiffusion:
model_kwargs=None, model_kwargs=None,
device=None, device=None,
verbose=False, verbose=False,
progress=None,
desc=None desc=None
): ):
""" """
@ -626,7 +615,7 @@ class GaussianDiffusion:
img = th.randn(*shape, device=device) img = th.randn(*shape, device=device)
indices = list(range(self.num_timesteps))[::-1] indices = list(range(self.num_timesteps))[::-1]
for i in tqdm_override(indices, verbose=verbose, desc=desc, progress=progress): for i in tqdm(indices, desc=desc):
t = th.tensor([i] * shape[0], device=device) t = th.tensor([i] * shape[0], device=device)
with th.no_grad(): with th.no_grad():
out = self.p_sample( out = self.p_sample(
@ -741,7 +730,6 @@ class GaussianDiffusion:
device=None, device=None,
verbose=False, verbose=False,
eta=0.0, eta=0.0,
progress=None,
desc=None, desc=None,
): ):
""" """
@ -761,7 +749,6 @@ class GaussianDiffusion:
device=device, device=device,
verbose=verbose, verbose=verbose,
eta=eta, eta=eta,
progress=progress,
desc=desc desc=desc
): ):
final = sample final = sample
@ -779,7 +766,6 @@ class GaussianDiffusion:
device=None, device=None,
verbose=False, verbose=False,
eta=0.0, eta=0.0,
progress=None,
desc=None, desc=None,
): ):
""" """
@ -798,10 +784,7 @@ class GaussianDiffusion:
indices = list(range(self.num_timesteps))[::-1] indices = list(range(self.num_timesteps))[::-1]
if verbose: if verbose:
# Lazy import so that we don't depend on tqdm. indices = tqdm(indices, desc=desc)
from tqdm.auto import tqdm
indices = tqdm_override(indices, verbose=verbose, desc=desc, progress=progress)
for i in indices: for i in indices:
t = th.tensor([i] * shape[0], device=device) t = th.tensor([i] * shape[0], device=device)