forked from mrq/DL-Art-School
fix booboo
This commit is contained in:
parent
cc62ba9cba
commit
45afefabed
|
@ -273,7 +273,7 @@ class TransformerDiffusion(nn.Module):
|
|||
return groups
|
||||
|
||||
def before_step(self, step):
|
||||
scaled_grad_parameters = list(itertools.chain.from_iterable([lyr.out.parameters() for lyr in self.diff.layers]))
|
||||
scaled_grad_parameters = list(itertools.chain.from_iterable([lyr.out.parameters() for lyr in self.layers]))
|
||||
# Scale back the gradients of the blkout and prenorm layers by a constant factor. These get two orders of magnitudes
|
||||
# higher gradients. Ideally we would use parameter groups, but ZeroRedundancyOptimizer makes this trickier than
|
||||
# directly fiddling with the gradients.
|
||||
|
|
Loading…
Reference in New Issue
Block a user