From b398ecca0151ce1e3928887d115e254e938cf520 Mon Sep 17 00:00:00 2001 From: James Betker Date: Sat, 15 Jan 2022 17:28:17 -0700 Subject: [PATCH] wer fix --- codes/scripts/audio/word_error_rate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codes/scripts/audio/word_error_rate.py b/codes/scripts/audio/word_error_rate.py index 5a0a0e07..8c6ca203 100644 --- a/codes/scripts/audio/word_error_rate.py +++ b/codes/scripts/audio/word_error_rate.py @@ -31,13 +31,14 @@ if __name__ == '__main__': # Pre-process truth values truths = load_truths(libri_base) + niltok = VoiceBpeTokenizer(None) ground_truths = [] hypotheses = [] with open(inference_tsv, 'r') as tsv_file: tsv = tsv_file.read().splitlines() for line in tqdm(tsv): sentence_pred, wav = line.split('\t') - hypotheses.append(sentence_pred) + hypotheses.append(niltok.preprocess_text(sentence_pred)) ground_truths.append(truths[wav]) wer = wer(ground_truths, hypotheses)*100 print(f"WER: {wer}")