diff --git a/codes/models/audio/tts/unet_diffusion_tts_flat0.py b/codes/models/audio/tts/unet_diffusion_tts_flat0.py index 4ee4cd35..77fc0260 100644 --- a/codes/models/audio/tts/unet_diffusion_tts_flat0.py +++ b/codes/models/audio/tts/unet_diffusion_tts_flat0.py @@ -182,7 +182,10 @@ class DiffusionTtsFlat(nn.Module): def get_grad_norm_parameter_groups(self): groups = { 'minicoder': list(self.contextual_embedder.parameters()), - 'layers': list(self.layers), + 'layers': list(self.layers.parameters()), + 'code_converters': list(self.code_embedding.parameters()) + list(self.code_converter.parameters()) + list(self.latent_converter.parameters()) + list(self.latent_converter.parameters()), + 'timestep_integrator': list(self.conditioning_timestep_integrator.parameters()) + list(self.integrating_conv.parameters()), + 'time_embed': list(self.time_embed.parameters()), } return groups diff --git a/codes/trainer/injectors/audio_injectors.py b/codes/trainer/injectors/audio_injectors.py index ccde6503..cd9adaaf 100644 --- a/codes/trainer/injectors/audio_injectors.py +++ b/codes/trainer/injectors/audio_injectors.py @@ -132,7 +132,7 @@ class DiscreteTokenInjector(Injector): super().__init__(opt, env) cfg = opt_get(opt, ['dvae_config'], "../experiments/train_diffusion_vocoder_22k_level.yml") dvae_name = opt_get(opt, ['dvae_name'], 'dvae') - self.dvae = load_model_from_config(cfg, dvae_name).cuda().eval() + self.dvae = load_model_from_config(cfg, dvae_name, device=env['device']).eval() def forward(self, state): inp = state[self.input]