forked from mrq/tortoise-tts
Compare commits
3 Commits
84316d8f80
...
5f1c032312
Author | SHA1 | Date | |
---|---|---|---|
5f1c032312 | |||
|
2f86565969 | ||
|
192a510ee1 |
|
@ -483,8 +483,11 @@ class TextToSpeech:
|
|||
auto_conditioning, diffusion_conditioning, auto_conds, _ = conditioning_latents
|
||||
else:
|
||||
auto_conditioning, diffusion_conditioning = self.get_random_conditioning_latents()
|
||||
|
||||
auto_conditioning = auto_conditioning.to(self.device)
|
||||
diffusion_conditioning = diffusion_conditioning.to(self.device)
|
||||
if auto_conds is not None:
|
||||
auto_conds = auto_conds.to(self.device)
|
||||
|
||||
diffuser = load_discrete_vocoder_diffuser(desired_diffusion_steps=diffusion_iterations, cond_free=cond_free, cond_free_k=cond_free_k)
|
||||
|
||||
|
@ -539,8 +542,10 @@ class TextToSpeech:
|
|||
for batch in tqdm_override(samples, verbose=verbose, progress=progress, desc=desc):
|
||||
for i in range(batch.shape[0]):
|
||||
batch[i] = fix_autoregressive_output(batch[i], stop_mel_token)
|
||||
|
||||
if cvvp_amount != 1:
|
||||
clvp = self.clvp(text_tokens.repeat(batch.shape[0], 1), batch, return_loss=False)
|
||||
|
||||
if auto_conds is not None and cvvp_amount > 0:
|
||||
cvvp_accumulator = 0
|
||||
for cl in range(auto_conds.shape[1]):
|
||||
|
|
12
webui.py
12
webui.py
|
@ -265,7 +265,7 @@ def generate(
|
|||
with open(f'{get_voice_dir()}/{voice}/cond_latents.pth', 'rb') as f:
|
||||
info['latents'] = base64.b64encode(f.read()).decode("ascii")
|
||||
|
||||
if voicefixer:
|
||||
if args.voice_fixer and voicefixer:
|
||||
# we could do this on the pieces before they get stiched up anyways to save some compute
|
||||
# but the stitching would need to read back from disk, defeating the point of caching the waveform
|
||||
for path in progress.tqdm(audio_cache, desc="Running voicefix..."):
|
||||
|
@ -443,8 +443,12 @@ def reload_tts():
|
|||
def cancel_generate():
|
||||
tortoise.api.STOP_SIGNAL = True
|
||||
|
||||
def get_voice_list():
|
||||
voice_dir = get_voice_dir()
|
||||
return [d for d in os.listdir(voice_dir) if os.path.isdir(os.path.join(voice_dir, d))]
|
||||
|
||||
def update_voices():
|
||||
return gr.Dropdown.update(choices=sorted(os.listdir(get_voice_dir())) + ["microphone"])
|
||||
return gr.Dropdown.update(choices=sorted(get_voice_list()) + ["microphone"])
|
||||
|
||||
def export_exec_settings( share, listen, check_for_updates, models_from_local_only, low_vram, embed_output_metadata, latents_lean_and_mean, voice_fixer, cond_latent_max_chunk_size, sample_batch_size, concurrency_count, output_sample_rate, output_volume ):
|
||||
args.share = share
|
||||
|
@ -592,7 +596,7 @@ def setup_gradio():
|
|||
)
|
||||
prompt = gr.Textbox(lines=1, label="Custom Emotion + Prompt (if selected)")
|
||||
voice = gr.Dropdown(
|
||||
sorted(os.listdir(get_voice_dir())) + ["microphone"],
|
||||
sorted(get_voice_list()) + ["microphone"],
|
||||
label="Voice",
|
||||
type="value",
|
||||
)
|
||||
|
@ -692,7 +696,7 @@ def setup_gradio():
|
|||
with gr.Row():
|
||||
with gr.Column():
|
||||
history_voices = gr.Dropdown(
|
||||
sorted(os.listdir(get_voice_dir())) + ["microphone"],
|
||||
sorted(get_voice_list()) + ["microphone"],
|
||||
label="Voice",
|
||||
type="value",
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue
Block a user