diff --git a/src/utils.py b/src/utils.py index e3277bf..7abc779 100755 --- a/src/utils.py +++ b/src/utils.py @@ -1487,6 +1487,11 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p lines = { 'training': [], 'validation': [] } segments = {} + # I'm not sure how the VALL-E implementation decides what's validation and what's not + if args.tts_backend == "vall-e": + text_length = 0 + audio_length = 0 + for filename in enumerate_progress(results, desc="Parsing results", progress=progress): use_segment = use_segments @@ -1647,7 +1652,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p quantized = valle_quantize( waveform, sample_rate ).cpu() torch.save(quantized, qnt_file) - print("Quantized:", file) + print("Quantized:", qnt_file) for i in enumerate_progress(range(len(jobs['phonemize'][0])), desc="Phonemizing", progress=progress): phn_file = jobs['phonemize'][0][i] @@ -1655,7 +1660,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p phonemized = valle_phonemize( normalized ) open(phn_file, 'w', encoding='utf-8').write(" ".join(phonemized)) - print("Phonemized:", file) + print("Phonemized:", phn_file) training_joined = "\n".join(lines['training']) validation_joined = "\n".join(lines['validation'])