From 827cf632e70c315a98542a202d02a4cbe9abc70e Mon Sep 17 00:00:00 2001 From: mrq Date: Sat, 1 Jun 2024 10:44:32 -0500 Subject: [PATCH] report current loss scale and adjust grad norm by loss scale (for deepspeed) --- vall_e/engines/base.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/vall_e/engines/base.py b/vall_e/engines/base.py index 66bc8a3..6d3a871 100755 --- a/vall_e/engines/base.py +++ b/vall_e/engines/base.py @@ -491,6 +491,12 @@ class Engines(dict[str, Engine]): elapsed_time = time.time() - start_time total_elapsed_time += elapsed_time + grad_norm = engine.get_global_grad_norm() + loss_scale = 1 + if hasattr(engine.optimizer, "loss_scale"): + loss_scale = engine.optimizer.loss_scale + + grad_norm /= loss_scale stats.update( flatten_dict( @@ -498,7 +504,8 @@ class Engines(dict[str, Engine]): name.split("-")[0]: dict( **engine_stats, lr=engine.get_lr()[0], - grad_norm=engine.get_global_grad_norm(), + grad_norm=grad_norm, + loss_scale=loss_scale if loss_scale != 1 else None, elapsed_time=elapsed_time, engine_step=engine.global_step, samples_processed=engine.global_samples,