lots of bad entries
This commit is contained in:
parent
dcf98df0c2
commit
30ddac69aa
|
@ -24,8 +24,8 @@ def load_tsv(filename):
|
||||||
components = line.strip().split('\t')
|
components = line.strip().split('\t')
|
||||||
if len(components) < 2:
|
if len(components) < 2:
|
||||||
bad_lines += 1
|
bad_lines += 1
|
||||||
if bad_lines > 10:
|
if bad_lines > 1000:
|
||||||
print(f'{filename} contains 10+ bad entries. Failing. Sample last entry: {line}')
|
print(f'{filename} contains 1000+ bad entries. Failing. Sample last entry: {line}')
|
||||||
raise ValueError
|
raise ValueError
|
||||||
continue
|
continue
|
||||||
filepaths_and_text.append([os.path.join(base, f'{components[1]}'), components[0]])
|
filepaths_and_text.append([os.path.join(base, f'{components[1]}'), components[0]])
|
||||||
|
@ -50,8 +50,8 @@ def load_tsv_aligned_codes(filename):
|
||||||
components = line.strip().split('\t')
|
components = line.strip().split('\t')
|
||||||
if len(components) < 3:
|
if len(components) < 3:
|
||||||
bad_lines += 1
|
bad_lines += 1
|
||||||
if bad_lines > 10:
|
if bad_lines > 1000:
|
||||||
print(f'{filename} contains 10+ bad entries. Failing. Sample last entry: {line}')
|
print(f'{filename} contains 1000+ bad entries. Failing. Sample last entry: {line}')
|
||||||
raise ValueError
|
raise ValueError
|
||||||
continue
|
continue
|
||||||
filepaths_and_text.append([os.path.join(base, f'{components[1]}'), components[0], convert_string_list_to_tensor(components[2])])
|
filepaths_and_text.append([os.path.join(base, f'{components[1]}'), components[0], convert_string_list_to_tensor(components[2])])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user