forked from mrq/DL-Art-School
Allow processing of multiple audio sources at once from nv_tacotron_dataset
This commit is contained in:
parent
007976082b
commit
d6a73acaed
|
@ -18,7 +18,8 @@ from utils.util import opt_get
|
|||
def load_mozilla_cv(filename):
|
||||
with open(filename, encoding='utf-8') as f:
|
||||
components = [line.strip().split('\t') for line in f][1:] # First line is the header
|
||||
filepaths_and_text = [[f'clips/{component[1]}', component[2]] for component in components]
|
||||
base = os.path.dirname(filename)
|
||||
filepaths_and_text = [[os.path.join(base, f'clips/{component[1]}'), component[2]] for component in components]
|
||||
return filepaths_and_text
|
||||
|
||||
|
||||
|
@ -29,15 +30,24 @@ class TextMelLoader(torch.utils.data.Dataset):
|
|||
3) computes mel-spectrograms from audio files.
|
||||
"""
|
||||
def __init__(self, hparams):
|
||||
self.path = os.path.dirname(hparams['path'])
|
||||
self.path = hparams['path']
|
||||
if not isinstance(self.path, list):
|
||||
self.path = [self.path]
|
||||
|
||||
fetcher_mode = opt_get(hparams, ['fetcher_mode'], 'lj')
|
||||
if fetcher_mode == 'lj':
|
||||
if not isinstance(fetcher_mode, list):
|
||||
fetcher_mode = [fetcher_mode]
|
||||
assert len(self.path) == len(fetcher_mode)
|
||||
|
||||
self.audiopaths_and_text = []
|
||||
for p, fm in zip(self.path, fetcher_mode):
|
||||
if fm == 'lj' or fm == 'libritts':
|
||||
fetcher_fn = load_filepaths_and_text
|
||||
elif fetcher_mode == 'mozilla_cv':
|
||||
elif fm == 'mozilla_cv':
|
||||
fetcher_fn = load_mozilla_cv
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
self.audiopaths_and_text = fetcher_fn(hparams['path'])
|
||||
self.audiopaths_and_text.extend(fetcher_fn(p))
|
||||
self.text_cleaners = hparams.text_cleaners
|
||||
self.max_wav_value = hparams.max_wav_value
|
||||
self.sampling_rate = hparams.sampling_rate
|
||||
|
@ -61,7 +71,6 @@ class TextMelLoader(torch.utils.data.Dataset):
|
|||
def get_mel_text_pair(self, audiopath_and_text):
|
||||
# separate filename and text
|
||||
audiopath, text = audiopath_and_text[0], audiopath_and_text[1]
|
||||
audiopath = os.path.join(self.path, audiopath)
|
||||
text_seq = self.get_text(text)
|
||||
mel = self.get_mel(audiopath)
|
||||
return (text_seq, mel, text, audiopath_and_text[0])
|
||||
|
@ -205,11 +214,11 @@ def load_mel_buffer_from_file(path):
|
|||
def dump_mels_to_disk():
|
||||
params = {
|
||||
'mode': 'nv_tacotron',
|
||||
'path': 'E:\\audio\\MozillaCommonVoice\\en\\test.tsv',
|
||||
'path': ['E:\\audio\\MozillaCommonVoice\\en\\test.tsv', 'E:\\audio\\LibriTTS\\train-other-500_list.txt'],
|
||||
'fetcher_mode': ['mozilla_cv', 'libritts'],
|
||||
'phase': 'train',
|
||||
'n_workers': 0,
|
||||
'batch_size': 1,
|
||||
'fetcher_mode': 'mozilla_cv',
|
||||
'needs_collate': True,
|
||||
'max_mel_length': 1000,
|
||||
'max_text_length': 200,
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
import os.path
|
||||
|
||||
import numpy as np
|
||||
from scipy.io.wavfile import read
|
||||
import torch
|
||||
|
@ -18,6 +20,9 @@ def load_wav_to_torch(full_path):
|
|||
def load_filepaths_and_text(filename, split="|"):
|
||||
with open(filename, encoding='utf-8') as f:
|
||||
filepaths_and_text = [line.strip().split(split) for line in f]
|
||||
base = os.path.dirname(filename)
|
||||
for j in range(len(filepaths_and_text)):
|
||||
filepaths_and_text[j][0] = os.path.join(base, filepaths_and_text[j][0])
|
||||
return filepaths_and_text
|
||||
|
||||
|
||||
|
|
31
codes/scripts/audio/preprocess_libritts.py
Normal file
31
codes/scripts/audio/preprocess_libritts.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
# Combines all libriTTS WAV->text mappings into a single file
|
||||
import os
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
if __name__ == '__main__':
|
||||
libri_root = 'E:\\audio\\LibriTTS'
|
||||
basis = 'train-other-500'
|
||||
|
||||
readers = os.listdir(os.path.join(libri_root, basis))
|
||||
ofile = open(os.path.join(libri_root, f'{basis}_list.txt'), 'w', encoding='utf-8')
|
||||
for reader_dir in tqdm(readers):
|
||||
reader = os.path.join(libri_root, basis, reader_dir)
|
||||
if not os.path.isdir(reader):
|
||||
continue
|
||||
for chapter_dir in os.listdir(reader):
|
||||
chapter = os.path.join(reader, chapter_dir)
|
||||
if not os.path.isdir(chapter):
|
||||
continue
|
||||
id = f'{os.path.basename(reader)}_{os.path.basename(chapter)}'
|
||||
trans_file = f'{id}.trans.tsv'
|
||||
with open(os.path.join(chapter, trans_file), encoding='utf-8') as f:
|
||||
trans_lines = [line.strip().split('\t') for line in f]
|
||||
for line in trans_lines:
|
||||
wav_file, raw_text, normalized_text = line
|
||||
wav_file = '/'.join([basis, reader_dir, chapter_dir, f'{wav_file}.wav'])
|
||||
if not os.path.exists(os.path.join(libri_root, wav_file)):
|
||||
print(f'!WARNING could not open {wav_file}')
|
||||
ofile.write(f'{wav_file}|{normalized_text}\n')
|
||||
ofile.flush()
|
||||
ofile.close()
|
Loading…
Reference in New Issue
Block a user