diff --git a/codes/models/audio/music/transformer_diffusion12.py b/codes/models/audio/music/transformer_diffusion12.py index 96c3f385..b52639fe 100644 --- a/codes/models/audio/music/transformer_diffusion12.py +++ b/codes/models/audio/music/transformer_diffusion12.py @@ -511,7 +511,8 @@ class TransformerDiffusionWithMultiPretrainedVqvae(nn.Module): # higher gradients. Ideally we would use parameter groups, but ZeroRedundancyOptimizer makes this trickier than # directly fiddling with the gradients. for p in scaled_grad_parameters: - p.grad *= .2 + if hasattr(p, 'grad') and p.grad is not None: + p.grad *= .2 class TransformerDiffusionWithCheaterLatent(nn.Module): @@ -566,7 +567,8 @@ class TransformerDiffusionWithCheaterLatent(nn.Module): # higher gradients. Ideally we would use parameter groups, but ZeroRedundancyOptimizer makes this trickier than # directly fiddling with the gradients. for p in scaled_grad_parameters: - p.grad *= .2 + if hasattr(p, 'grad') and p.grad is not None: + p.grad *= .2 @register_model