From 3081c893d4a0f2a830dce5b12480ee40bb0ce899 Mon Sep 17 00:00:00 2001 From: James Betker Date: Fri, 17 Jun 2022 09:27:04 -0600 Subject: [PATCH] Don't augment grad scale when the grad don't exist! --- codes/models/audio/music/transformer_diffusion12.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codes/models/audio/music/transformer_diffusion12.py b/codes/models/audio/music/transformer_diffusion12.py index 0ca0c029..96c3f385 100644 --- a/codes/models/audio/music/transformer_diffusion12.py +++ b/codes/models/audio/music/transformer_diffusion12.py @@ -342,7 +342,8 @@ class TransformerDiffusionWithQuantizer(nn.Module): # higher gradients. Ideally we would use parameter groups, but ZeroRedundancyOptimizer makes this trickier than # directly fiddling with the gradients. for p in scaled_grad_parameters: - p.grad *= .2 + if hasattr(p, 'grad') and p.grad is not None: + p.grad *= .2 class TransformerDiffusionWithARPrior(nn.Module):