forked from mrq/DL-Art-School
Don't augment grad scale when the grad don't exist!
This commit is contained in:
parent
3efd64ed7a
commit
3081c893d4
|
@ -342,7 +342,8 @@ class TransformerDiffusionWithQuantizer(nn.Module):
|
||||||
# higher gradients. Ideally we would use parameter groups, but ZeroRedundancyOptimizer makes this trickier than
|
# higher gradients. Ideally we would use parameter groups, but ZeroRedundancyOptimizer makes this trickier than
|
||||||
# directly fiddling with the gradients.
|
# directly fiddling with the gradients.
|
||||||
for p in scaled_grad_parameters:
|
for p in scaled_grad_parameters:
|
||||||
p.grad *= .2
|
if hasattr(p, 'grad') and p.grad is not None:
|
||||||
|
p.grad *= .2
|
||||||
|
|
||||||
|
|
||||||
class TransformerDiffusionWithARPrior(nn.Module):
|
class TransformerDiffusionWithARPrior(nn.Module):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user