DL-Art-School/codes/scripts/audio/asr_eval.py
2021-12-28 11:45:15 -07:00

89 lines
2.9 KiB
Python

import os
import os.path as osp
import logging
import random
import argparse
import torchvision
import utils
import utils.options as option
import utils.util as util
from models.tacotron2.text import sequence_to_text
from trainer.ExtensibleTrainer import ExtensibleTrainer
from data import create_dataset, create_dataloader
from tqdm import tqdm
import torch
import numpy as np
from scipy.io import wavfile
def forward_pass(model, data, output_dir, opt, macro_b, dataset):
with torch.no_grad():
model.feed_data(data, 0)
model.test()
gt_key = opt['eval']['gen_text']
txts = []
for b in range(model.eval_state[gt_key][0].shape[0]):
if 'real_text' in opt['eval'].keys():
real = data[opt['eval']['real_text']][b]
print(f'{macro_b} {b} Real text: "{real}"')
codes = model.eval_state[opt['eval']['gen_text']][0][b].cpu()
if hasattr(dataset, 'tokenizer'):
text = dataset.tokenizer.decode(codes.numpy())
text = text.replace(' $$$', '')
txts.append(text)
else:
txts.append(sequence_to_text(codes))
return txts
if __name__ == "__main__":
# Set seeds
torch.manual_seed(5555)
random.seed(5555)
np.random.seed(5555)
#### options
torch.backends.cudnn.benchmark = True
want_metrics = False
parser = argparse.ArgumentParser()
parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_gpt_asr_hf2.yml')
opt = option.parse(parser.parse_args().opt, is_train=False)
opt = option.dict_to_nonedict(opt)
utils.util.loaded_options = opt
util.mkdirs(
(path for key, path in opt['path'].items()
if not key == 'experiments_root' and 'pretrain_model' not in key and 'resume' not in key))
util.setup_logger('base', opt['path']['log'], 'test_' + opt['name'], level=logging.INFO,
screen=True, tofile=True)
logger = logging.getLogger('base')
logger.info(option.dict2str(opt))
dataset_opt = opt['datasets']['val']
test_set, collate_fn = create_dataset(dataset_opt, return_collate=True)
test_loader = create_dataloader(test_set, dataset_opt, collate_fn=collate_fn)
logger.info('Number of test texts in [{:s}]: {:d}'.format(dataset_opt['name'], len(test_set)))
model = ExtensibleTrainer(opt)
batch = 0
output = open('results.tsv', 'w')
dataset_dir = opt['path']['results_root']
util.mkdir(dataset_dir)
for data in tqdm(test_loader):
#if data['clip'].shape[-1] > opt['networks']['asr_gen']['kwargs']['max_mel_frames']*255:
# continue
preds = forward_pass(model, data, dataset_dir, opt, batch, test_set)
for b, pred in enumerate(preds):
pred = pred.replace('_', '')
output.write(f'{pred}\t{os.path.basename(data["filenames"][b])}\n')
print(pred)
batch += 1
output.flush()