forked from mrq/DL-Art-School
fix booboo
This commit is contained in:
parent
cc62ba9cba
commit
45afefabed
|
@ -273,7 +273,7 @@ class TransformerDiffusion(nn.Module):
|
||||||
return groups
|
return groups
|
||||||
|
|
||||||
def before_step(self, step):
|
def before_step(self, step):
|
||||||
scaled_grad_parameters = list(itertools.chain.from_iterable([lyr.out.parameters() for lyr in self.diff.layers]))
|
scaled_grad_parameters = list(itertools.chain.from_iterable([lyr.out.parameters() for lyr in self.layers]))
|
||||||
# Scale back the gradients of the blkout and prenorm layers by a constant factor. These get two orders of magnitudes
|
# Scale back the gradients of the blkout and prenorm layers by a constant factor. These get two orders of magnitudes
|
||||||
# higher gradients. Ideally we would use parameter groups, but ZeroRedundancyOptimizer makes this trickier than
|
# higher gradients. Ideally we would use parameter groups, but ZeroRedundancyOptimizer makes this trickier than
|
||||||
# directly fiddling with the gradients.
|
# directly fiddling with the gradients.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user