From 729c1fd5a93166609f1b2063a8a590f871f93553 Mon Sep 17 00:00:00 2001 From: James Betker Date: Sun, 15 Aug 2021 21:29:28 -0600 Subject: [PATCH] Fix up max lengths to save memory --- codes/models/gpt_voice/gpt_audio_segmentor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/codes/models/gpt_voice/gpt_audio_segmentor.py b/codes/models/gpt_voice/gpt_audio_segmentor.py index 847a545d..5ae57e71 100644 --- a/codes/models/gpt_voice/gpt_audio_segmentor.py +++ b/codes/models/gpt_voice/gpt_audio_segmentor.py @@ -66,6 +66,9 @@ class GptSegmentor(nn.Module): self.stop_head = nn.Linear(model_dim, 1) def forward(self, mel_inputs, mel_lengths): + max_len = mel_lengths.max() # This can be done in the dataset layer, but it is easier to do here. + mel_inputs = mel_inputs[:, :, :max_len] + mel_emb = self.mel_encoder(mel_inputs) mel_lengths = mel_lengths // 4 # The encoder decimates the mel by a factor of 4. mel_emb = mel_emb.permute(0,2,1).contiguous()