From eecbc0e6785278d1c2925ad018906e13cfa9dff1 Mon Sep 17 00:00:00 2001 From: James Betker Date: Tue, 15 Mar 2022 10:35:11 -0600 Subject: [PATCH] Use wider spectrogram when asked --- codes/models/gpt_voice/unet_diffusion_tts9.py | 4 ++-- codes/scripts/audio/gen_mel.py | 21 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 codes/scripts/audio/gen_mel.py diff --git a/codes/models/gpt_voice/unet_diffusion_tts9.py b/codes/models/gpt_voice/unet_diffusion_tts9.py index 3e6b4256..d0ac61fa 100644 --- a/codes/models/gpt_voice/unet_diffusion_tts9.py +++ b/codes/models/gpt_voice/unet_diffusion_tts9.py @@ -223,8 +223,8 @@ class DiffusionTts(nn.Module): )) self.latent_converter = nn.Conv1d(in_latent_channels, conditioning_dim, 1) self.aligned_latent_padding_embedding = nn.Parameter(torch.randn(1,in_latent_channels,1)) - if in_channels == 80: - self.contextual_embedder = nn.Sequential(nn.Conv1d(80,conditioning_dim,3,padding=1,stride=2), + if in_channels > 60: # It's a spectrogram. + self.contextual_embedder = nn.Sequential(nn.Conv1d(in_channels,conditioning_dim,3,padding=1,stride=2), CheckpointedXTransformerEncoder( needs_permute=True, max_seq_len=-1, diff --git a/codes/scripts/audio/gen_mel.py b/codes/scripts/audio/gen_mel.py new file mode 100644 index 00000000..f9458edf --- /dev/null +++ b/codes/scripts/audio/gen_mel.py @@ -0,0 +1,21 @@ +import os + +import torch + +from data.util import find_files_of_type, is_audio_file +from trainer.injectors.audio_injectors import MelSpectrogramInjector +from utils.util import load_audio + +if __name__ == '__main__': + path = 'C:\\Users\\jbetk\\Documents\\tmp\\some_audio' + + inj = MelSpectrogramInjector({'in': 'wav', 'out': 'mel', + 'mel_fmax': 12000, 'sampling_rate': 22050, 'n_mel_channels': 100 + },{}) + audio = find_files_of_type('img', path, qualifier=is_audio_file)[0] + for clip in audio: + if not clip.endswith('.wav'): + continue + wav = load_audio(clip, 24000) + mel = inj({'wav': wav.unsqueeze(0)})['mel'] + torch.save(mel, clip.replace('.wav', '.mel')) \ No newline at end of file