DL-Art-School/codes/scripts/audio/word_error_rate.py

import Levenshtein
from jiwer import wer, compute_measures
import torch
from tqdm import tqdm

from data.audio.voice_tokenizer import VoiceBpeTokenizer


def load_truths(file):
    niltok = VoiceBpeTokenizer(None)
    out = {}
    with open(file, 'r', encoding='utf-8') as f:
        for line in f.readlines():
            spl = line.split('|')
            if len(spl) != 2:
                print(spl)
                continue
            path, truth = spl
            #path = path.replace('wav/', '')
            # This preprocesses the truth data in the same way that training data is processed: removing punctuation, all lowercase, removing unnecessary
            # whitespace, and applying "english cleaners", which convert words like "mrs" to "missus" and such.
            truth = niltok.preprocess_text(truth)
            out[path] = truth
    return out


if __name__ == '__main__':
    inference_tsv = 'results.tsv'
    libri_base = 'y:\\bigasr_dataset/librispeech/test_clean/test_clean.txt'

    # Pre-process truth values
    truths = load_truths(libri_base)

    niltok = VoiceBpeTokenizer(None)
    ground_truths = []
    hypotheses = []
    with open(inference_tsv, 'r') as tsv_file:
        tsv = tsv_file.read().splitlines()
        for line in tqdm(tsv):
            sentence_pred, wav = line.split('\t')
            hypotheses.append(niltok.preprocess_text(sentence_pred))
            ground_truths.append(truths[wav])
    wer = wer(ground_truths, hypotheses)*100
    print(f"WER: {wer}")
update wer script 2022-01-14 00:08:49 +00:00			`import Levenshtein`
			`from jiwer import wer, compute_measures`
WER script 2021-10-26 19:30:29 +00:00			`import torch`
			`from tqdm import tqdm`

wer update 2021-12-31 23:21:39 +00:00			`from data.audio.voice_tokenizer import VoiceBpeTokenizer`
WER script 2021-10-26 19:30:29 +00:00

wer update 2021-12-31 23:21:39 +00:00			`def load_truths(file):`
			`niltok = VoiceBpeTokenizer(None)`
			`out = {}`
			`with open(file, 'r', encoding='utf-8') as f:`
update wer script 2022-01-14 00:08:49 +00:00			`for line in f.readlines():`
wer update 2021-12-31 23:21:39 +00:00			`spl = line.split('\|')`
			`if len(spl) != 2:`
update wer script 2022-01-14 00:08:49 +00:00			`print(spl)`
wer update 2021-12-31 23:21:39 +00:00			`continue`
			`path, truth = spl`
update wer script 2022-01-14 00:08:49 +00:00			`#path = path.replace('wav/', '')`
			`# This preprocesses the truth data in the same way that training data is processed: removing punctuation, all lowercase, removing unnecessary`
			`# whitespace, and applying "english cleaners", which convert words like "mrs" to "missus" and such.`
			`truth = niltok.preprocess_text(truth)`
wer update 2021-12-31 23:21:39 +00:00			`out[path] = truth`
			`return out`


WER script 2021-10-26 19:30:29 +00:00			`if __name__ == '__main__':`
wer update 2021-12-31 23:21:39 +00:00			`inference_tsv = 'results.tsv'`
update wer script 2022-01-14 00:08:49 +00:00			`libri_base = 'y:\\bigasr_dataset/librispeech/test_clean/test_clean.txt'`
wer update 2021-12-31 23:21:39 +00:00
			`# Pre-process truth values`
			`truths = load_truths(libri_base)`
WER script 2021-10-26 19:30:29 +00:00
wer fix 2022-01-16 00:28:17 +00:00			`niltok = VoiceBpeTokenizer(None)`
update wer script 2022-01-14 00:08:49 +00:00			`ground_truths = []`
			`hypotheses = []`
WER script 2021-10-26 19:30:29 +00:00			`with open(inference_tsv, 'r') as tsv_file:`
			`tsv = tsv_file.read().splitlines()`
			`for line in tqdm(tsv):`
			`sentence_pred, wav = line.split('\t')`
wer fix 2022-01-16 00:28:17 +00:00			`hypotheses.append(niltok.preprocess_text(sentence_pred))`
update wer script 2022-01-14 00:08:49 +00:00			`ground_truths.append(truths[wav])`
			`wer = wer(ground_truths, hypotheses)*100`
			`print(f"WER: {wer}")`