Add support for mel norms across the channel dim

This commit is contained in:
James Betker 2021-12-12 19:52:08 -07:00
parent 8917c02a4d
commit aa7cfd1edf
2 changed files with 3 additions and 3 deletions

View File

@ -11,11 +11,11 @@ from trainer.injectors.base_injectors import TorchMelSpectrogramInjector
from utils.audio import plot_spectrogram from utils.audio import plot_spectrogram
def wav_to_mel(wav): def wav_to_mel(wav, mel_norms_file='../experiments/clips_mel_norms.pth'):
""" """
Converts an audio clip into a MEL tensor that the vocoder, DVAE and GptTts models use whenever a MEL is called for. Converts an audio clip into a MEL tensor that the vocoder, DVAE and GptTts models use whenever a MEL is called for.
""" """
return TorchMelSpectrogramInjector({'in': 'wav', 'out': 'mel'},{})({'wav': wav})['mel'] return TorchMelSpectrogramInjector({'in': 'wav', 'out': 'mel', 'mel_norm_file': mel_norms_file},{})({'wav': wav})['mel']
def convert_mel_to_codes(dvae_model, mel): def convert_mel_to_codes(dvae_model, mel):

View File

@ -632,7 +632,7 @@ class TorchMelSpectrogramInjector(Injector):
def test_torch_mel_injector(): def test_torch_mel_injector():
a = load_audio('D:\\data\\audio\\libritts\\train-clean-100\\19\\198\\19_198_000000_000000.wav', 22050) a = load_audio('D:\\data\\audio\\libritts\\train-clean-100\\19\\198\\19_198_000000_000000.wav', 22050)
inj = TorchMelSpectrogramInjector({'in': 'in', 'out': 'out'}, {}) inj = TorchMelSpectrogramInjector({'in': 'in', 'out': 'out', 'mel_norm_file': '../experiments/clips_mel_norms.pth'}, {})
f = inj({'in': a.unsqueeze(0)})['out'] f = inj({'in': a.unsqueeze(0)})['out']
plot_spectrogram(f[0]) plot_spectrogram(f[0])
inj = MelSpectrogramInjector({'in': 'in', 'out': 'out'}, {}) inj = MelSpectrogramInjector({'in': 'in', 'out': 'out'}, {})