diff --git a/vall_e/emb/qnt.py b/vall_e/emb/qnt.py index c6f712a..845c0b1 100755 --- a/vall_e/emb/qnt.py +++ b/vall_e/emb/qnt.py @@ -20,32 +20,29 @@ from tqdm import tqdm from torch.nn.utils.rnn import pad_sequence +AVAILABLE_AUDIO_BACKENDS = [] try: from .codecs.encodec import * - cfg.inference.audio_backends["encodec"] = True + AVAILABLE_AUDIO_BACKENDS.append("encodec") except Exception as e: - cfg.inference.audio_backends["encodec"] = False # e _logger.warning(str(e)) try: from .codecs.vocos import * - cfg.inference.audio_backends["vocos"] = True + AVAILABLE_AUDIO_BACKENDS.append("vocos") except Exception as e: - cfg.inference.audio_backends["vocos"] = False # e _logger.warning(str(e)) try: from .codecs.dac import * - cfg.inference.audio_backends["dac"] = True + AVAILABLE_AUDIO_BACKENDS.append("dac") except Exception as e: - cfg.inference.audio_backends["dac"] = False # e _logger.warning(str(e)) try: from .codecs.nemo import * - cfg.inference.audio_backends["nemo"] = True + AVAILABLE_AUDIO_BACKENDS.append("nemo") except Exception as e: - cfg.inference.audio_backends["nemo"] = False # e _logger.warning(str(e)) @cache diff --git a/vall_e/inference.py b/vall_e/inference.py index 6cdf09f..1639b4a 100644 --- a/vall_e/inference.py +++ b/vall_e/inference.py @@ -14,7 +14,7 @@ from pathlib import Path from tqdm import tqdm, trange from .emb import g2p, qnt -from .emb.qnt import trim, trim_random, unload_model, repeat_extend_audio +from .emb.qnt import trim, trim_random, unload_model, repeat_extend_audio, AVAILABLE_AUDIO_BACKENDS from .emb.transcribe import transcribe from .utils import to_device, set_seed, clamp, ml @@ -62,7 +62,7 @@ class TTS(): cfg.dataset.use_hdf5 = False # could use cfg.load_hdf5(), but why would it ever need to be loaded for inferencing # fallback to encodec if no vocos - if cfg.audio_backend == "vocos" and not cfg.inference.audio_backends.get("vocos", False): + if cfg.audio_backend == "vocos" and "vocos" not in AVAILABLE_AUDIO_BACKENDS: _logger.warning("Vocos requested but not available, falling back to Encodec...") cfg.set_audio_backend(cfg.audio_backend)