adjustments

2023-08-02 18:36:26 -05:00 · 2023-08-02 18:36:26 -05:00 · f6597e2dfe
commit f6597e2dfe
parent 0f9b81de75
5 changed files with 63 additions and 33 deletions
--- a/vall_e/config.py
+++ b/vall_e/config.py
@ -259,6 +259,19 @@ class Trainer:
 	zero_optimization_level: int = 0
 	use_compression_training: bool = False
@dataclass()
 class Inference:
 	use_vocos: bool = True
@dataclass()
 class BitsAndBytes:
 	enabled: bool = False
 	injects: bool = False
 	linear: bool = False
 	embedding: bool = False
@dataclass()
 class Config(_Config):
 	device: str = "cuda"
@ -268,8 +281,9 @@ class Config(_Config):
 	hyperparameters: Hyperparameters = field(default_factory=lambda: Hyperparameters)
 	evaluation: Evaluation = field(default_factory=lambda: Evaluation)
 	trainer: Trainer = field(default_factory=lambda: Trainer)
 	inference: Inference = field(default_factory=lambda: Inference)
 	bitsandbytes: BitsAndBytes = field(default_factory=lambda: BitsAndBytes)
 	use_vocos: bool = True
 	@property
 	def sample_rate(self):
@ -438,6 +452,8 @@ cfg.models = Models(**cfg.models)
 cfg.hyperparameters = Hyperparameters(**cfg.hyperparameters)
 cfg.evaluation = Evaluation(**cfg.evaluation)
 cfg.trainer = Trainer(**cfg.trainer)
 cfg.inference = Inference(**cfg.inference)
 cfg.bitsandbytes = BitsAndBytes(**cfg.bitsandbytes)
 # cached_property stopped working...
 if cfg.dataset.use_hdf5:
--- a/vall_e/emb/qnt.py
+++ b/vall_e/emb/qnt.py
@ -15,12 +15,10 @@ from einops import rearrange
 from torch import Tensor
 from tqdm import tqdm
 USE_VOCOS = False
 try:
 	from vocos import Vocos
 	USE_VOCOS = True
 except Exception as e:
-	USE_VOCOS = False
+	cfg.inference.use_vocos = False
@cache
 def _load_encodec_model(device="cuda"):
@ -35,9 +33,11 @@ def _load_encodec_model(device="cuda"):
 	elif cfg.models.levels == 8:
 		bandwidth_id = 6.0
-	model = EncodecModel.encodec_model_24khz()
+	model = EncodecModel.encodec_model_24khz().to(device)
 	model.set_target_bandwidth(bandwidth_id)
-	model.to(device)
+	model.bandwidth_id = bandwidth_id
 	model.sample_rate = cfg.sample_rate
 	model.backend = "encodec"
 	return model
@ -58,11 +58,12 @@ def _load_vocos_model(device="cuda"):
 	model.bandwidth_id = torch.tensor([bandwidth_id], device=device)
 	model.sample_rate = cfg.sample_rate
 	model.backend = "vocos"
 	return model
@cache
-def _load_model(device="cuda", vocos=USE_VOCOS):
+def _load_model(device="cuda", vocos=cfg.inference.use_vocos):
 	if vocos:
 		model = _load_vocos_model(device)
 	else:
@ -99,7 +100,7 @@ def decode(codes: Tensor, device="cuda"):
 		codes = codes.to(torch.int32)
 	kwargs = {}
-	if USE_VOCOS:
+	if model.backend == "vocos":
 		x = model.codes_to_features(codes[0])
 		kwargs['bandwidth_id'] = model.bandwidth_id
 	else:  
@ -107,7 +108,7 @@ def decode(codes: Tensor, device="cuda"):
 	wav = model.decode(x, **kwargs)
-	if not USE_VOCOS:
+	if model.backend == "encodec":
 		wav = wav[0]
 	return wav, model.sample_rate
--- a/vall_e/models/base.py
+++ b/vall_e/models/base.py
@ -16,8 +16,6 @@ from torchmetrics.classification import BinaryAccuracy, MulticlassAccuracy, Mult
 from .retnet import RetNetDecoder, RetNetConfig
 from .transformer import SinusoidalEmbedding, Block as TransformerBlock
 from ..utils import wrapper as ml
 def _create_mask(l, device):
 	"""1 is valid region and 0 is invalid."""
 	seq = torch.arange(max(l), device=device).unsqueeze(0)  # (1 t)
--- a/vall_e/utils/engines.py
+++ b/vall_e/utils/engines.py
@ -5,7 +5,7 @@
 # to-do: replace this
 # to-do: swap out deepspeed
-from .config import Config
+from ..config import Config
 from .distributed import fix_unset_envs
 from .utils import dispatch_attribute, flatten_dict, gather_attribute, do_gc, to_device
--- a/vall_e/utils/wrapper.py
+++ b/vall_e/utils/wrapper.py
@ -1,21 +1,19 @@
 # to-do: re-introduce bitsandbytes support
 from contextlib import contextmanager
 import torch
 import torch.nn.functional as F
 from ..config import cfg
 Embedding = torch.nn.Embedding
 Linear = torch.nn.Linear
-"""
+if cfg.bitsandbytes.enabled:
 if cfg.bitsandbytes:
 	import bitsandbytes as bnb
-	if cfg.bitsandbytes_linear:
+	if cfg.bitsandbytes.linear:
 		Linear = bnb.nn.Linear8bitLt
-	if cfg.bitsandbytes_embedding:
+	if cfg.bitsandbytes.embedding:
 		Embedding = bnb.nn.StableEmbedding
 		Embedding.forward = lambda self, input: ( self.norm(F.embedding(
 			input,
@ -26,28 +24,15 @@ if cfg.bitsandbytes:
 			self.scale_grad_by_freq,
 			self.sparse,
 		)).to(self.weight.dtype) )
 """
 Adam = torch.optim.Adam
 AdamW = torch.optim.AdamW
-"""
+if cfg.bitsandbytes.enabled:
 if cfg.bitsandbytes:
 	import bitsandbytes as bnb
 	Adam = bnb.optim.Adam
 	AdamW = bnb.optim.AdamW
 """
 # handles temporarily upcasting 'index tensors' so torch will stop bitching
 def autocast_forward( func ):
 	def wrapper( self, input, *args, **kwargs ):
 		if input.dtype == torch.int16 or input.dtype == torch.int8 or input.dtype == torch.uint8:
 			input = input.to(torch.int32)
 		return func( self, input, *args, **kwargs )
 	return wrapper
 Embedding.forward = autocast_forward(Embedding.forward)
 # handles generically converting to a specific tensor type and converting back (implemented solely for bfloat16)
@contextmanager
@ -58,3 +43,33 @@ def autocast(input, from_dtype, to_dtype):
 		input = input.to(from_dtype)
 	else:
 		yield input
@contextmanager
 def autocasts(input, from_dtype, to_dtype):
 	if input.dtype in from_dtype:
 		from_dtype = input.dtype
 		input = input.to(to_dtype)
 		yield input
 		input = input.to(from_dtype)
 	else:
 		yield input
 # handles temporarily upcasting 'index tensors' so torch will stop bitching
 def autocast_forward( func ):
 	def wrapper( self, input, *args, **kwargs ):
 		with autocasts( input, [torch.int16, torch.int8, torch.uint8], torch.int32 ) as k:
 			return func( self, k, *args, **kwargs )
 		"""
 		if input.dtype == torch.int16 or input.dtype == torch.int8 or input.dtype == torch.uint8:
 			return func( self, input.to(torch.int32), *args, **kwargs )
 		return func( self, input, *args, **kwargs )
 		"""
 	return wrapper
 Embedding.forward = autocast_forward(Embedding.forward)
 if cfg.bitsandbytes.injects and cfg.bitsandbytes.enabled:
 	torch.nn.Linear = Linear
 	torch.nn.Embedding = Embedding
 	torch.optim.Adam = Adam
 	torch.optim.AdamW = AdamW