From bfe70e9d56a73620567c151dcd3c32385294a43e Mon Sep 17 00:00:00 2001 From: mrq Date: Thu, 3 Apr 2025 23:26:00 -0500 Subject: [PATCH] ugh --- vall_e/config.py | 2 ++ vall_e/emb/qnt.py | 16 ++++++++-------- vall_e/inference.py | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/vall_e/config.py b/vall_e/config.py index 8dd53be..6430ab7 100644 --- a/vall_e/config.py +++ b/vall_e/config.py @@ -839,6 +839,8 @@ class Inference: batch_size: int = 16 # I don't know what would be a good batch size + audio_backends: dict = field(default_factory=lambda: {}) + @property def dtype(self): return coerce_dtype(self.weight_dtype) diff --git a/vall_e/emb/qnt.py b/vall_e/emb/qnt.py index ee306fc..c6f712a 100755 --- a/vall_e/emb/qnt.py +++ b/vall_e/emb/qnt.py @@ -22,30 +22,30 @@ from torch.nn.utils.rnn import pad_sequence try: from .codecs.encodec import * + cfg.inference.audio_backends["encodec"] = True except Exception as e: - cfg.inference.use_encodec = False - #raise e + cfg.inference.audio_backends["encodec"] = False # e _logger.warning(str(e)) try: from .codecs.vocos import * + cfg.inference.audio_backends["vocos"] = True except Exception as e: - cfg.inference.use_vocos = False - #raise e + cfg.inference.audio_backends["vocos"] = False # e _logger.warning(str(e)) try: from .codecs.dac import * + cfg.inference.audio_backends["dac"] = True except Exception as e: - cfg.inference.use_dac = False - #raise e + cfg.inference.audio_backends["dac"] = False # e _logger.warning(str(e)) try: from .codecs.nemo import * + cfg.inference.audio_backends["nemo"] = True except Exception as e: - cfg.inference.use_nemo = False - #raise e + cfg.inference.audio_backends["nemo"] = False # e _logger.warning(str(e)) @cache diff --git a/vall_e/inference.py b/vall_e/inference.py index 2fed0d0..6cdf09f 100644 --- a/vall_e/inference.py +++ b/vall_e/inference.py @@ -62,7 +62,7 @@ class TTS(): cfg.dataset.use_hdf5 = False # could use cfg.load_hdf5(), but why would it ever need to be loaded for inferencing # fallback to encodec if no vocos - if cfg.audio_backend == "vocos" and not cfg.inference.use_vocos: + if cfg.audio_backend == "vocos" and not cfg.inference.audio_backends.get("vocos", False): _logger.warning("Vocos requested but not available, falling back to Encodec...") cfg.set_audio_backend(cfg.audio_backend)