Another fix

This commit is contained in:
James Betker 2021-12-22 18:30:50 -07:00
parent 6c6daa5795
commit 191e0130ee
2 changed files with 4 additions and 6 deletions

View File

@ -221,8 +221,8 @@ if __name__ == '__main__':
batch_sz = 8 batch_sz = 8
params = { params = {
'mode': 'paired_voice_audio', 'mode': 'paired_voice_audio',
'path': ['Z:\\bigasr_dataset\\libritts\\test-clean_list.txt'], 'path': ['Z:\\clips\\podcasts-0-transcribed.tsv'],
'fetcher_mode': ['libritts'], 'fetcher_mode': ['tsv'],
'phase': 'train', 'phase': 'train',
'n_workers': 0, 'n_workers': 0,
'batch_size': batch_sz, 'batch_size': batch_sz,
@ -230,9 +230,7 @@ if __name__ == '__main__':
'max_wav_length': 255995, 'max_wav_length': 255995,
'max_text_length': 200, 'max_text_length': 200,
'sample_rate': 22050, 'sample_rate': 22050,
'load_conditioning': True, 'load_conditioning': False,
'num_conditioning_candidates': 3,
'conditioning_length': 44100,
} }
from data import create_dataset, create_dataloader from data import create_dataset, create_dataloader

View File

@ -35,7 +35,7 @@ def train():
bcd = datasets.load_dataset('bookcorpus', cache_dir='Z:\\huggingface_datasets\\cache')['train'] bcd = datasets.load_dataset('bookcorpus', cache_dir='Z:\\huggingface_datasets\\cache')['train']
wkd = datasets.load_dataset('wikipedia', '20200501.en', cache_dir='Z:\\huggingface_datasets\\cache')['train'] wkd = datasets.load_dataset('wikipedia', '20200501.en', cache_dir='Z:\\huggingface_datasets\\cache')['train']
allowed_characters_re = re.compile(r'^[a-z!@#%_=:;"/, \-\$\^&\*\(\)\+\{\[\]\}\\\.]+$') allowed_characters_re = re.compile(r'^[0-9a-z!@#%_=:;"/, \-\$\^&\*\(\)\+\{\[\]\}\\\.\']+$')
def preprocess_word(word): def preprocess_word(word):
word = word.lower() word = word.lower()
if not bool(allowed_characters_re.match(word)): if not bool(allowed_characters_re.match(word)):