diff --git a/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py b/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py
index 9be0c264..8e2909fa 100644
--- a/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py
+++ b/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py
@@ -19,7 +19,10 @@ class DiscreteSpectrogramConditioningBlock(nn.Module):
         self.intg = nn.Sequential(nn.Conv1d(channels*2, channels*2, kernel_size=1),
                                   normalization(channels*2),
                                   nn.SiLU(),
-                                  nn.Conv1d(channels*2, channels, kernel_size=3, padding=1))
+                                  nn.Conv1d(channels*2, channels, kernel_size=3, padding=1),
+                                  normalization(channels),
+                                  nn.SiLU(),
+                                  zero_module(nn.Conv1d(channels, channels, kernel_size=1)))
 
     """
     Embeds the given codes and concatenates them onto x. Return shape is the same as x.shape.