From ef05c951ffbd254dc2e791cb55775c0b897be2f3 Mon Sep 17 00:00:00 2001 From: mrq Date: Thu, 14 Nov 2024 09:23:52 -0600 Subject: [PATCH] adjust fp16 loss scaling since I fried a model overnight when it hit 8K scale --- vall_e/config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vall_e/config.py b/vall_e/config.py index d5eb5d1..56793dd 100755 --- a/vall_e/config.py +++ b/vall_e/config.py @@ -558,6 +558,8 @@ class DeepSpeed: "fp16": { "enabled": cfg.trainer.weight_dtype.lower() == "float16", "auto_cast": True, # ??? + "loss_scale_window": 100, # raise every 100 consecutive good steps + "min_loss_scale": 32768.0, # loss scale hitting 8K fries the model, 16K is fine but 32K is comfy "loss_scale": 0.0 if cfg.trainer.scale_loss else 1.0, }, "bf16": {