forked from mrq/DL-Art-School
Allow usage of pre-rendered mels saved to npy files
This commit is contained in:
parent
ad3391bd96
commit
c28f657ab8
|
@ -51,7 +51,7 @@ class TextMelLoader(torch.utils.data.Dataset):
|
||||||
self.text_cleaners = hparams.text_cleaners
|
self.text_cleaners = hparams.text_cleaners
|
||||||
self.max_wav_value = hparams.max_wav_value
|
self.max_wav_value = hparams.max_wav_value
|
||||||
self.sampling_rate = hparams.sampling_rate
|
self.sampling_rate = hparams.sampling_rate
|
||||||
self.load_mel_from_disk = hparams.load_mel_from_disk
|
self.load_mel_from_disk = opt_get(hparams, ['load_mel_from_disk'], False)
|
||||||
self.return_wavs = opt_get(hparams, ['return_wavs'], False)
|
self.return_wavs = opt_get(hparams, ['return_wavs'], False)
|
||||||
self.input_sample_rate = opt_get(hparams, ['input_sample_rate'], self.sampling_rate)
|
self.input_sample_rate = opt_get(hparams, ['input_sample_rate'], self.sampling_rate)
|
||||||
assert not (self.load_mel_from_disk and self.return_wavs)
|
assert not (self.load_mel_from_disk and self.return_wavs)
|
||||||
|
@ -76,7 +76,11 @@ class TextMelLoader(torch.utils.data.Dataset):
|
||||||
return (text_seq, mel, text, audiopath_and_text[0])
|
return (text_seq, mel, text, audiopath_and_text[0])
|
||||||
|
|
||||||
def get_mel(self, filename):
|
def get_mel(self, filename):
|
||||||
if not self.load_mel_from_disk:
|
if self.load_mel_from_disk and os.path.exists(f'{filename}_mel.npy'):
|
||||||
|
melspec = torch.from_numpy(np.load(f'{filename}_mel.npy'))
|
||||||
|
assert melspec.size(0) == self.stft.n_mel_channels, (
|
||||||
|
'Mel dimension mismatch: given {}, expected {}'.format(melspec.size(0), self.stft.n_mel_channels))
|
||||||
|
else:
|
||||||
if filename.endswith('.wav'):
|
if filename.endswith('.wav'):
|
||||||
audio, sampling_rate = load_wav_to_torch(filename)
|
audio, sampling_rate = load_wav_to_torch(filename)
|
||||||
audio = (audio / self.max_wav_value)
|
audio = (audio / self.max_wav_value)
|
||||||
|
@ -105,9 +109,6 @@ class TextMelLoader(torch.utils.data.Dataset):
|
||||||
melspec = self.stft.mel_spectrogram(audio_norm)
|
melspec = self.stft.mel_spectrogram(audio_norm)
|
||||||
melspec = torch.squeeze(melspec, 0)
|
melspec = torch.squeeze(melspec, 0)
|
||||||
else:
|
else:
|
||||||
melspec = torch.from_numpy(np.load(filename))
|
|
||||||
assert melspec.size(0) == self.stft.n_mel_channels, (
|
|
||||||
'Mel dimension mismatch: given {}, expected {}'.format(melspec.size(0), self.stft.n_mel_channels))
|
|
||||||
|
|
||||||
|
|
||||||
return melspec
|
return melspec
|
||||||
|
@ -209,10 +210,6 @@ def save_mel_buffer_to_file(mel, path):
|
||||||
np.save(path, mel.numpy())
|
np.save(path, mel.numpy())
|
||||||
|
|
||||||
|
|
||||||
def load_mel_buffer_from_file(path):
|
|
||||||
return torch.tensor(np.load(path))
|
|
||||||
|
|
||||||
|
|
||||||
def dump_mels_to_disk():
|
def dump_mels_to_disk():
|
||||||
params = {
|
params = {
|
||||||
'mode': 'nv_tacotron',
|
'mode': 'nv_tacotron',
|
||||||
|
|
|
@ -9,11 +9,11 @@ import numpy as np
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
src_dir = 'O:\\podcast_dumps'
|
src_dir = 'P:\\Audiobooks-Podcasts'
|
||||||
#src_dir = 'E:\\audio\\books'
|
#src_dir = 'E:\\audio\\books'
|
||||||
output_dir = 'D:\\data\\audio\\podcasts-split'
|
output_dir = 'D:\\data\\audio\\misc-split'
|
||||||
output_dir_lq = 'D:\\data\\audio\\podcasts-split-with-bg'
|
output_dir_lq = 'D:\\data\\audio\\misc-split-with-bg'
|
||||||
output_dir_garbage = 'D:\\data\\audio\\podcasts-split-garbage'
|
output_dir_garbage = 'D:\\data\\audio\\misc-split-garbage'
|
||||||
#output_dir = 'E:\\audio\\books-clips'
|
#output_dir = 'E:\\audio\\books-clips'
|
||||||
clip_length = 5 # In seconds
|
clip_length = 5 # In seconds
|
||||||
sparsity = .1 # Only this proportion of the total clips are extracted as wavs.
|
sparsity = .1 # Only this proportion of the total clips are extracted as wavs.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user