fix booboo

This commit is contained in:
James Betker 2022-07-24 18:00:14 -06:00
parent cc62ba9cba
commit 45afefabed

View File

@ -273,7 +273,7 @@ class TransformerDiffusion(nn.Module):
return groups
def before_step(self, step):
scaled_grad_parameters = list(itertools.chain.from_iterable([lyr.out.parameters() for lyr in self.diff.layers]))
scaled_grad_parameters = list(itertools.chain.from_iterable([lyr.out.parameters() for lyr in self.layers]))
# Scale back the gradients of the blkout and prenorm layers by a constant factor. These get two orders of magnitudes
# higher gradients. Ideally we would use parameter groups, but ZeroRedundancyOptimizer makes this trickier than
# directly fiddling with the gradients.