This commit is contained in:
James Betker 2022-01-15 17:28:17 -07:00
parent 9100e7fa9b
commit b398ecca01

View File

@ -31,13 +31,14 @@ if __name__ == '__main__':
# Pre-process truth values # Pre-process truth values
truths = load_truths(libri_base) truths = load_truths(libri_base)
niltok = VoiceBpeTokenizer(None)
ground_truths = [] ground_truths = []
hypotheses = [] hypotheses = []
with open(inference_tsv, 'r') as tsv_file: with open(inference_tsv, 'r') as tsv_file:
tsv = tsv_file.read().splitlines() tsv = tsv_file.read().splitlines()
for line in tqdm(tsv): for line in tqdm(tsv):
sentence_pred, wav = line.split('\t') sentence_pred, wav = line.split('\t')
hypotheses.append(sentence_pred) hypotheses.append(niltok.preprocess_text(sentence_pred))
ground_truths.append(truths[wav]) ground_truths.append(truths[wav])
wer = wer(ground_truths, hypotheses)*100 wer = wer(ground_truths, hypotheses)*100
print(f"WER: {wer}") print(f"WER: {wer}")