Don't augment grad scale when the grad don't exist!

2022-06-17 09:27:04 -06:00 · 2022-06-17 09:27:04 -06:00 · 3081c893d4
commit 3081c893d4
parent 3efd64ed7a
1 changed files with 2 additions and 1 deletions
--- a/codes/models/audio/music/transformer_diffusion12.py
+++ b/codes/models/audio/music/transformer_diffusion12.py
@ -342,7 +342,8 @@ class TransformerDiffusionWithQuantizer(nn.Module):
        # higher gradients. Ideally we would use parameter groups, but ZeroRedundancyOptimizer makes this trickier than
        # directly fiddling with the gradients.
        for p in scaled_grad_parameters:
-            p.grad *= .2
+            if hasattr(p, 'grad') and p.grad is not None:
                p.grad *= .2
 class TransformerDiffusionWithARPrior(nn.Module):