Another fix

This commit is contained in:
James Betker 2021-12-22 18:30:50 -07:00
parent 6c6daa5795
commit 191e0130ee
2 changed files with 4 additions and 6 deletions

View File

@ -221,8 +221,8 @@ if __name__ == '__main__':
batch_sz = 8
params = {
'mode': 'paired_voice_audio',
'path': ['Z:\\bigasr_dataset\\libritts\\test-clean_list.txt'],
'fetcher_mode': ['libritts'],
'path': ['Z:\\clips\\podcasts-0-transcribed.tsv'],
'fetcher_mode': ['tsv'],
'phase': 'train',
'n_workers': 0,
'batch_size': batch_sz,
@ -230,9 +230,7 @@ if __name__ == '__main__':
'max_wav_length': 255995,
'max_text_length': 200,
'sample_rate': 22050,
'load_conditioning': True,
'num_conditioning_candidates': 3,
'conditioning_length': 44100,
'load_conditioning': False,
}
from data import create_dataset, create_dataloader

View File

@ -35,7 +35,7 @@ def train():
bcd = datasets.load_dataset('bookcorpus', cache_dir='Z:\\huggingface_datasets\\cache')['train']
wkd = datasets.load_dataset('wikipedia', '20200501.en', cache_dir='Z:\\huggingface_datasets\\cache')['train']
allowed_characters_re = re.compile(r'^[a-z!@#%_=:;"/, \-\$\^&\*\(\)\+\{\[\]\}\\\.]+$')
allowed_characters_re = re.compile(r'^[0-9a-z!@#%_=:;"/, \-\$\^&\*\(\)\+\{\[\]\}\\\.\']+$')
def preprocess_word(word):
word = word.lower()
if not bool(allowed_characters_re.match(word)):