maybe not

2024-08-09 11:38:08 -05:00 · 2024-08-09 11:38:08 -05:00 · ed373957e2
commit ed373957e2
parent c658a7b440
2 changed files with 3 additions and 5 deletions
--- a/vall_e/config.py
+++ b/vall_e/config.py
@ -614,7 +614,7 @@ class Trainer:

 	amp: bool = False # automatic mixed precision
 	ddp: bool = False # torch's internal DDP, automatically set if local backend is used and multiple GPUs are requested
-	scale_loss: bool = False # whether to perform loss scaling (for FP16 training) (it actually seems more harmful than not for this specific workload)
+	#scale_loss: bool = False # whether to perform loss scaling (for FP16 training) (it actually seems more harmful than not for this specific workload)

 	load_webui: bool = False # not working, but loads the web UI to allow inferencing during training
 	no_logger: bool = False # deprecated, but reroutes some logger calls to normal print statements for when logger broke because of BitNet
@ -634,14 +634,12 @@ class Trainer:
 			return torch.float8_e4m3fn
 		return torch.float32

-	"""
 	@cached_property
 	def scale_loss(self):
 		# currently cannot feasibly apply loss scaling with DeepSpeed backend (it can handle it itself anyways)
-		if self.backend != "local":
-			return False
 		return self.dtype == torch.float16
 	"""
+	"""


@dataclass()
--- a/vall_e/models/ar_nar.py
+++ b/vall_e/models/ar_nar.py
@ -316,7 +316,7 @@ class AR_NAR(Base):


 def example_usage():
-	cfg.trainer.backend = "local"
+	# cfg.trainer.backend = "local"
 	cfg.hyperparameters.gradient_accumulation_steps = 1
 	if cfg.audio_backend == "dac":
 		cfg.sample_rate = 44_100