From 776e5634fd3bfa7dc310f26fa486ca38130fb2f2 Mon Sep 17 00:00:00 2001 From: James Betker Date: Wed, 13 Apr 2022 17:04:19 -0600 Subject: [PATCH] Remove intelligibility refinement It's not longer a concern. :) --- api.py | 26 -------------------------- requirements.txt | 3 +-- 2 files changed, 1 insertion(+), 28 deletions(-) diff --git a/api.py b/api.py index 978358e..2c4c336 100644 --- a/api.py +++ b/api.py @@ -5,9 +5,7 @@ from urllib import request import torch import torch.nn.functional as F -import torchaudio import progressbar -import ocotillo from models.diffusion_decoder import DiffusionTts from models.autoregressive import UnifiedVoice @@ -262,27 +260,3 @@ class TextToSpeech: if len(wav_candidates) > 1: return wav_candidates return wav_candidates[0] - - def refine_for_intellibility(self, wav_candidates, corresponding_codes, output_path): - """ - Further refine the remaining candidates using a ASR model to pick out the ones that are the most understandable. - TODO: finish this function - :param wav_candidates: - :return: - """ - transcriber = ocotillo.Transcriber(on_cuda=True) - transcriptions = transcriber.transcribe_batch(torch.cat(wav_candidates, dim=0).squeeze(1), 24000) - best = 99999999 - for i, transcription in enumerate(transcriptions): - dist = lev_distance(transcription, args.text.lower()) - if dist < best: - best = dist - best_codes = corresponding_codes[i].unsqueeze(0) - best_wav = wav_candidates[i] - del transcriber - torchaudio.save(os.path.join(output_path, f'{voice}_poor.wav'), best_wav.squeeze(0).cpu(), 24000) - - # Perform diffusion again with the high-quality diffuser. - mel = do_spectrogram_diffusion(diffusion, final_diffuser, best_codes, cond_diffusion, mean=False) - wav = vocoder.inference(mel) - torchaudio.save(os.path.join(args.output_path, f'{voice}.wav'), wav.squeeze(0).cpu(), 24000) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1e695a5..568575c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,5 +7,4 @@ inflect progressbar einops unidecode -x-transformers -ocotillo \ No newline at end of file +x-transformers \ No newline at end of file