This commit is contained in:
mrq 2025-04-03 23:26:00 -05:00
parent 2e93438867
commit bfe70e9d56
3 changed files with 11 additions and 9 deletions

View File

@ -839,6 +839,8 @@ class Inference:
batch_size: int = 16 # I don't know what would be a good batch size
audio_backends: dict = field(default_factory=lambda: {})
@property
def dtype(self):
return coerce_dtype(self.weight_dtype)

View File

@ -22,30 +22,30 @@ from torch.nn.utils.rnn import pad_sequence
try:
from .codecs.encodec import *
cfg.inference.audio_backends["encodec"] = True
except Exception as e:
cfg.inference.use_encodec = False
#raise e
cfg.inference.audio_backends["encodec"] = False # e
_logger.warning(str(e))
try:
from .codecs.vocos import *
cfg.inference.audio_backends["vocos"] = True
except Exception as e:
cfg.inference.use_vocos = False
#raise e
cfg.inference.audio_backends["vocos"] = False # e
_logger.warning(str(e))
try:
from .codecs.dac import *
cfg.inference.audio_backends["dac"] = True
except Exception as e:
cfg.inference.use_dac = False
#raise e
cfg.inference.audio_backends["dac"] = False # e
_logger.warning(str(e))
try:
from .codecs.nemo import *
cfg.inference.audio_backends["nemo"] = True
except Exception as e:
cfg.inference.use_nemo = False
#raise e
cfg.inference.audio_backends["nemo"] = False # e
_logger.warning(str(e))
@cache

View File

@ -62,7 +62,7 @@ class TTS():
cfg.dataset.use_hdf5 = False # could use cfg.load_hdf5(), but why would it ever need to be loaded for inferencing
# fallback to encodec if no vocos
if cfg.audio_backend == "vocos" and not cfg.inference.use_vocos:
if cfg.audio_backend == "vocos" and not cfg.inference.audio_backends.get("vocos", False):
_logger.warning("Vocos requested but not available, falling back to Encodec...")
cfg.set_audio_backend(cfg.audio_backend)