From 23da073037fab3ca2a1f6b139b1b79cdb5f560f2 Mon Sep 17 00:00:00 2001 From: James Betker Date: Sat, 16 Oct 2021 09:07:10 -0600 Subject: [PATCH] Norm decoder outputs now --- .../models/gpt_voice/unet_diffusion_vocoder_with_ref.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py b/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py index 140afb89..1713154f 100644 --- a/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py +++ b/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py @@ -14,9 +14,10 @@ class DiscreteSpectrogramConditioningBlock(nn.Module): def __init__(self, dvae_channels, channels): super().__init__() self.emb = nn.Conv1d(dvae_channels, channels, kernel_size=1) - self.norm = normalization(channels) - self.act = nn.SiLU() - self.intg = nn.Sequential(nn.Conv1d(channels*2, channels*2, kernel_size=1), + self.intg = nn.Sequential( + normalization(channels*2), + nn.SiLU(), + nn.Conv1d(channels*2, channels*2, kernel_size=1), normalization(channels*2), nn.SiLU(), nn.Conv1d(channels*2, channels, kernel_size=3, padding=1), @@ -35,7 +36,7 @@ class DiscreteSpectrogramConditioningBlock(nn.Module): _, q, N = dvae_in.shape emb = self.emb(dvae_in) emb = nn.functional.interpolate(emb, size=(S,), mode='nearest') - together = torch.cat([self.act(self.norm(x)), emb], dim=1) + together = torch.cat([x, emb], dim=1) together = self.intg(together) return together + x