From 23da073037fab3ca2a1f6b139b1b79cdb5f560f2 Mon Sep 17 00:00:00 2001
From: James Betker <jbetker@gmail.com>
Date: Sat, 16 Oct 2021 09:07:10 -0600
Subject: [PATCH] Norm decoder outputs now

---
 .../models/gpt_voice/unet_diffusion_vocoder_with_ref.py  | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py b/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py
index 140afb89..1713154f 100644
--- a/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py
+++ b/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py
@@ -14,9 +14,10 @@ class DiscreteSpectrogramConditioningBlock(nn.Module):
     def __init__(self, dvae_channels, channels):
         super().__init__()
         self.emb = nn.Conv1d(dvae_channels, channels, kernel_size=1)
-        self.norm = normalization(channels)
-        self.act = nn.SiLU()
-        self.intg = nn.Sequential(nn.Conv1d(channels*2, channels*2, kernel_size=1),
+        self.intg = nn.Sequential(
+                                  normalization(channels*2),
+                                  nn.SiLU(),
+                                  nn.Conv1d(channels*2, channels*2, kernel_size=1),
                                   normalization(channels*2),
                                   nn.SiLU(),
                                   nn.Conv1d(channels*2, channels, kernel_size=3, padding=1),
@@ -35,7 +36,7 @@ class DiscreteSpectrogramConditioningBlock(nn.Module):
         _, q, N = dvae_in.shape
         emb = self.emb(dvae_in)
         emb = nn.functional.interpolate(emb, size=(S,), mode='nearest')
-        together = torch.cat([self.act(self.norm(x)), emb], dim=1)
+        together = torch.cat([x, emb], dim=1)
         together = self.intg(together)
         return together + x