From d0b2f931bf9140239606e3e8b5cb6b3f554c9ec8 Mon Sep 17 00:00:00 2001 From: James Betker Date: Tue, 7 Dec 2021 09:22:30 -0700 Subject: [PATCH] Add feature to diffusion vocoder where the spectrogram conditioning layers can be re-trained apart from the rest of the model --- .../gpt_voice/unet_diffusion_vocoder_with_ref.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py b/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py index 93a7d496..8b6acb88 100644 --- a/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py +++ b/codes/models/gpt_voice/unet_diffusion_vocoder_with_ref.py @@ -91,6 +91,7 @@ class DiffusionVocoderWithRef(nn.Module): conditioning_inputs_provided=True, conditioning_input_dim=80, time_embed_dim_multiplier=4, + only_train_dvae_connection_layers=False, ): super().__init__() @@ -131,6 +132,7 @@ class DiffusionVocoderWithRef(nn.Module): ) ] ) + spectrogram_blocks = [] self._feature_size = model_channels input_block_chans = [model_channels] ch = model_channels @@ -138,7 +140,9 @@ class DiffusionVocoderWithRef(nn.Module): for level, (mult, num_blocks) in enumerate(zip(channel_mult, num_res_blocks)): if ds in spectrogram_conditioning_resolutions: - self.input_blocks.append(DiscreteSpectrogramConditioningBlock(discrete_codes, ch)) + spec_cond_block = DiscreteSpectrogramConditioningBlock(discrete_codes, ch) + self.input_blocks.append(spec_cond_block) + spectrogram_blocks.append(spec_cond_block) ch *= 2 for _ in range(num_blocks): @@ -268,6 +272,15 @@ class DiffusionVocoderWithRef(nn.Module): zero_module(conv_nd(dims, model_channels, out_channels, kernel_size, padding=padding)), ) + if only_train_dvae_connection_layers: + for p in self.parameters(): + p.DO_NOT_TRAIN = True + p.requires_grad = False + for sb in spectrogram_blocks: + for p in sb.parameters(): + del p.DO_NOT_TRAIN + p.requires_grad = True + def convert_to_fp16(self): """ Convert the torso of the model to float16.