Re-instate autocasting

2022-02-25 11:06:18 -07:00 · 2022-02-25 11:06:18 -07:00 · c375287db9
commit c375287db9
parent 34ee32a90e
1 changed files with 45 additions and 40 deletions
--- a/codes/models/gpt_voice/unet_diffusion_tts7.py
+++ b/codes/models/gpt_voice/unet_diffusion_tts7.py
@ -431,6 +431,7 @@ class DiffusionTts(nn.Module):
            unaligned_h = self.unaligned_embedder(unaligned_input).permute(0,2,1)
            unaligned_h = self.unaligned_encoder(unaligned_h).permute(0,2,1)

+        with autocast(x.device.type):
            orig_x_shape = x.shape[-1]
            cm = ceil_multiple(x.shape[-1], 2048)
            if cm != 0:
@ -460,6 +461,7 @@ class DiffusionTts(nn.Module):
            else:
                code_emb = self.conditioning_encoder(code_emb)

+            first = True
            time_emb = time_emb.float()
            h = x
            for k, module in enumerate(self.input_blocks):
@ -467,15 +469,18 @@ class DiffusionTts(nn.Module):
                    h_tok = F.interpolate(module(code_emb), size=(h.shape[-1]), mode='nearest')
                    h = h + h_tok
                else:
+                    with autocast(x.device.type, enabled=not first):
                        # First block has autocast disabled to allow a high precision signal to be properly vectorized.
                        h = module(h, time_emb)
                    hs.append(h)
+                first = False
            h = self.middle_block(h, time_emb)
            for module in self.output_blocks:
                h = torch.cat([h, hs.pop()], dim=1)
                h = module(h, time_emb)

        # Last block also has autocast disabled for high-precision outputs.
+        h = h.float()
        out = self.out(h)
        return out[:, :, :orig_x_shape]