This commit is contained in:
James Betker 2021-10-31 14:29:23 -06:00
parent b404a3b747
commit b8b268b5f6
2 changed files with 13 additions and 10 deletions

View File

@ -98,6 +98,11 @@ class TextMelLoader(torch.utils.data.Dataset):
else: else:
if filename.endswith('.wav'): if filename.endswith('.wav'):
audio, sampling_rate = load_wav_to_torch(filename) audio, sampling_rate = load_wav_to_torch(filename)
elif filename.endswith('.mp3'):
# https://github.com/neonbjb/pyfastmp3decoder - Definitely worth it.
from pyfastmp3decoder.mp3decoder import load_mp3
audio, sampling_rate = load_mp3(filename, self.input_sample_rate)
audio = torch.FloatTensor(audio)
else: else:
audio, sampling_rate = audio2numpy.audio_from_file(filename) audio, sampling_rate = audio2numpy.audio_from_file(filename)
audio = torch.tensor(audio) audio = torch.tensor(audio)
@ -225,28 +230,26 @@ def save_mel_buffer_to_file(mel, path):
def dump_mels_to_disk(): def dump_mels_to_disk():
params = { params = {
'mode': 'nv_tacotron', 'mode': 'nv_tacotron',
'path': ['Z:\\voxpopuli\\audio\\transcribed_data\\en\\asr_test.tsv'], 'path': ['Z:\\mozcv\\en\\train.tsv'],
'fetcher_mode': ['voxpopuli'], 'fetcher_mode': ['mozilla_cv'],
'phase': 'train', 'phase': 'train',
'n_workers': 0, 'n_workers': 8,
'batch_size': 1, 'batch_size': 1,
'needs_collate': True, 'needs_collate': True,
'max_mel_length': 4000, 'max_mel_length': 10000,
'max_text_length': 600, 'max_text_length': 1000,
#'return_wavs': True, #'return_wavs': True,
#'input_sample_rate': 22050, #'input_sample_rate': 22050,
#'sampling_rate': 8000 #'sampling_rate': 8000
} }
output_path = 'D:\\dlas\\results\\mozcv_mels'
os.makedirs(os.path.join(output_path, 'clips'), exist_ok=True)
from data import create_dataset, create_dataloader from data import create_dataset, create_dataloader
ds, c = create_dataset(params, return_collate=True) ds, c = create_dataset(params, return_collate=True)
dl = create_dataloader(ds, params, collate_fn=c) dl = create_dataloader(ds, params, collate_fn=c)
for i, b in tqdm(enumerate(dl)): for b in tqdm(dl):
mels = b['padded_mel'] mels = b['padded_mel']
fnames = b['filenames'] fnames = b['filenames']
for j, fname in enumerate(fnames): for j, fname in enumerate(fnames):
save_mel_buffer_to_file(mels[j], f'{os.path.join(output_path, fname)}_mel.npy') save_mel_buffer_to_file(mels[j], f'{fname}_mel.npy')
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -284,7 +284,7 @@ class Trainer:
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_gpt_asr_mass_distill.yml') parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_diffusion_vocoder_clips.yml')
parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher') parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher')
parser.add_argument('--local_rank', type=int, default=0) parser.add_argument('--local_rank', type=int, default=0)
args = parser.parse_args() args = parser.parse_args()