forked from mrq/DL-Art-School
norm mel
This commit is contained in:
parent
b13d983c24
commit
b83b53cf84
|
@ -11,10 +11,10 @@ from utils.util import opt_get, load_model_from_config, pad_or_truncate
|
||||||
TACOTRON_MEL_MAX = 2.3143386840820312
|
TACOTRON_MEL_MAX = 2.3143386840820312
|
||||||
TACOTRON_MEL_MIN = -11.512925148010254
|
TACOTRON_MEL_MIN = -11.512925148010254
|
||||||
|
|
||||||
def normalize_tacotron_mel(mel):
|
def normalize_mel(mel):
|
||||||
return 2 * ((mel - TACOTRON_MEL_MIN) / (TACOTRON_MEL_MAX - TACOTRON_MEL_MIN)) - 1
|
return 2 * ((mel - TACOTRON_MEL_MIN) / (TACOTRON_MEL_MAX - TACOTRON_MEL_MIN)) - 1
|
||||||
|
|
||||||
def denormalize_tacotron_mel(norm_mel):
|
def denormalize_mel(norm_mel):
|
||||||
return ((norm_mel+1)/2)*(TACOTRON_MEL_MAX-TACOTRON_MEL_MIN)+TACOTRON_MEL_MIN
|
return ((norm_mel+1)/2)*(TACOTRON_MEL_MAX-TACOTRON_MEL_MIN)+TACOTRON_MEL_MIN
|
||||||
|
|
||||||
class MelSpectrogramInjector(Injector):
|
class MelSpectrogramInjector(Injector):
|
||||||
|
@ -40,7 +40,7 @@ class MelSpectrogramInjector(Injector):
|
||||||
self.stft = self.stft.to(inp.device)
|
self.stft = self.stft.to(inp.device)
|
||||||
mel = self.stft.mel_spectrogram(inp)
|
mel = self.stft.mel_spectrogram(inp)
|
||||||
if self.do_normalization:
|
if self.do_normalization:
|
||||||
mel = normalize_tacotron_mel(mel)
|
mel = normalize_mel(mel)
|
||||||
return {self.output: mel}
|
return {self.output: mel}
|
||||||
|
|
||||||
|
|
||||||
|
@ -56,6 +56,7 @@ class TorchMelSpectrogramInjector(Injector):
|
||||||
self.mel_fmax = opt_get(opt, ['mel_fmax'], 8000)
|
self.mel_fmax = opt_get(opt, ['mel_fmax'], 8000)
|
||||||
self.sampling_rate = opt_get(opt, ['sampling_rate'], 22050)
|
self.sampling_rate = opt_get(opt, ['sampling_rate'], 22050)
|
||||||
norm = opt_get(opt, ['normalize'], False)
|
norm = opt_get(opt, ['normalize'], False)
|
||||||
|
self.true_norm = opt_get(opt, ['true_normalization'], False)
|
||||||
self.mel_stft = torchaudio.transforms.MelSpectrogram(n_fft=self.filter_length, hop_length=self.hop_length,
|
self.mel_stft = torchaudio.transforms.MelSpectrogram(n_fft=self.filter_length, hop_length=self.hop_length,
|
||||||
win_length=self.win_length, power=2, normalized=norm,
|
win_length=self.win_length, power=2, normalized=norm,
|
||||||
sample_rate=self.sampling_rate, f_min=self.mel_fmin,
|
sample_rate=self.sampling_rate, f_min=self.mel_fmin,
|
||||||
|
@ -79,6 +80,8 @@ class TorchMelSpectrogramInjector(Injector):
|
||||||
if self.mel_norms is not None:
|
if self.mel_norms is not None:
|
||||||
self.mel_norms = self.mel_norms.to(mel.device)
|
self.mel_norms = self.mel_norms.to(mel.device)
|
||||||
mel = mel / self.mel_norms.unsqueeze(0).unsqueeze(-1)
|
mel = mel / self.mel_norms.unsqueeze(0).unsqueeze(-1)
|
||||||
|
if self.true_norm:
|
||||||
|
mel = normalize_mel(mel)
|
||||||
return {self.output: mel}
|
return {self.output: mel}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user