diff --git a/codes/scripts/audio/preparation/combine_phonetic_and_text.py b/codes/scripts/audio/preparation/combine_phonetic_and_text.py new file mode 100644 index 00000000..cc0d344d --- /dev/null +++ b/codes/scripts/audio/preparation/combine_phonetic_and_text.py @@ -0,0 +1,32 @@ +import os + +if __name__ == '__main__': + basepath = 'Y:/clips/books2' + + english_file = os.path.join(basepath, 'transcribed-oco-realtext.tsv') + if not os.path.exists(english_file): + english_file = os.path.join(basepath, 'transcribed-oco.tsv') + phoneme_file = os.path.join(basepath, 'transcribed-phoneme-oco.tsv') + + texts = {} + with open(english_file, 'r', encoding='utf-8') as f: + for line in f.readlines(): + spl = line.split('\t') + if len(spl) == 3: + text, p, _ = spl + texts[p] = text + else: + print(f'Error processing line {line}') + + with open(phoneme_file, 'r', encoding='utf-8') as f: + wf = open(os.path.join(basepath, 'transcribed-phoneme-english-oco.tsv'), 'w', encoding='utf-8') + for line in f.readlines(): + spl = line.split('\t') + if len(spl) == 3: + _, p, codes = spl + codes = codes.strip() + if p not in texts: + print(f'Could not find the text for {p}') + continue + wf.write(f'{texts[p]}\t{p}\t{codes}\n') + wf.close() diff --git a/codes/trainer/eval/audio_diffusion_fid.py b/codes/trainer/eval/audio_diffusion_fid.py index e9c56b45..ad1937bf 100644 --- a/codes/trainer/eval/audio_diffusion_fid.py +++ b/codes/trainer/eval/audio_diffusion_fid.py @@ -1,5 +1,7 @@ import os import os.path as osp +import random + import torch import torchaudio import torchvision.utils @@ -33,6 +35,13 @@ class AudioDiffusionFid(evaluator.Evaluator): super().__init__(model, opt_eval, env, uses_all_ddp=True) self.real_path = opt_eval['eval_tsv'] self.data = load_tsv_aligned_codes(self.real_path) + + # Deterministically shuffle the data. + ostate = random.getstate() + random.seed(5) + random.shuffle(self.data) + random.setstate(ostate) + if 'clip_dataset' in opt_eval.keys(): self.data = self.data[:opt_eval['clip_dataset']] if distributed.is_initialized() and distributed.get_world_size() > 1: diff --git a/codes/trainer/eval/eval_wer.py b/codes/trainer/eval/eval_wer.py index 0c6f5f2f..a3f70445 100644 --- a/codes/trainer/eval/eval_wer.py +++ b/codes/trainer/eval/eval_wer.py @@ -1,6 +1,6 @@ from copy import deepcopy -from datasets import load_metric +#from datasets import load_metric import torch from tqdm import tqdm