From 1a0571276439fa1ac3282e4aea79e2b17d77ac3f Mon Sep 17 00:00:00 2001 From: James Betker Date: Sat, 5 Mar 2022 23:05:29 -0700 Subject: [PATCH] pvd --- codes/data/audio/paired_voice_audio_dataset.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/codes/data/audio/paired_voice_audio_dataset.py b/codes/data/audio/paired_voice_audio_dataset.py index 4c8d6860..f1c56b1b 100644 --- a/codes/data/audio/paired_voice_audio_dataset.py +++ b/codes/data/audio/paired_voice_audio_dataset.py @@ -17,9 +17,17 @@ from utils.util import opt_get def load_tsv(filename): with open(filename, encoding='utf-8') as f: - components = [line.strip().split('\t') for line in f] + filepaths_and_text = [] base = os.path.dirname(filename) - filepaths_and_text = [[os.path.join(base, f'{component[1]}'), component[0]] for component in components] + bad_lines = 0 + for line in f: + components = line.strip().split('\t') + if len(components) < 3: + bad_lines += 1 + if bad_lines > 10: + print(f'{filename} contains 10+ bad entries. Failing. Sample last entry: {line}') + raise ValueError + filepaths_and_text.append([os.path.join(base, f'{components[1]}'), components[0]]) return filepaths_and_text