lots of bad entries

This commit is contained in:
James Betker 2022-03-05 23:15:59 -07:00
parent dcf98df0c2
commit 30ddac69aa

View File

@ -24,8 +24,8 @@ def load_tsv(filename):
components = line.strip().split('\t') components = line.strip().split('\t')
if len(components) < 2: if len(components) < 2:
bad_lines += 1 bad_lines += 1
if bad_lines > 10: if bad_lines > 1000:
print(f'{filename} contains 10+ bad entries. Failing. Sample last entry: {line}') print(f'{filename} contains 1000+ bad entries. Failing. Sample last entry: {line}')
raise ValueError raise ValueError
continue continue
filepaths_and_text.append([os.path.join(base, f'{components[1]}'), components[0]]) filepaths_and_text.append([os.path.join(base, f'{components[1]}'), components[0]])
@ -50,8 +50,8 @@ def load_tsv_aligned_codes(filename):
components = line.strip().split('\t') components = line.strip().split('\t')
if len(components) < 3: if len(components) < 3:
bad_lines += 1 bad_lines += 1
if bad_lines > 10: if bad_lines > 1000:
print(f'{filename} contains 10+ bad entries. Failing. Sample last entry: {line}') print(f'{filename} contains 1000+ bad entries. Failing. Sample last entry: {line}')
raise ValueError raise ValueError
continue continue
filepaths_and_text.append([os.path.join(base, f'{components[1]}'), components[0], convert_string_list_to_tensor(components[2])]) filepaths_and_text.append([os.path.join(base, f'{components[1]}'), components[0], convert_string_list_to_tensor(components[2])])