diff --git a/codes/scripts/audio/test_audio_gen.py b/codes/scripts/audio/test_audio_gen.py index e8046d93..39fb6e40 100644 --- a/codes/scripts/audio/test_audio_gen.py +++ b/codes/scripts/audio/test_audio_gen.py @@ -19,11 +19,15 @@ def forward_pass(model, denoiser, data, output_dir, opt, b): with torch.no_grad(): model.feed_data(data, 0) model.test() - waveforms = model.eval_state[opt['eval']['output_state']][0] - waveforms = denoiser(waveforms) - for i in range(waveforms.shape[0]): - audio = waveforms[i][0].cpu().numpy() + pred_waveforms = model.eval_state[opt['eval']['output_state']][0] + pred_waveforms = denoiser(pred_waveforms) + ground_truth_waveforms = model.eval_state[opt['eval']['ground_truth']][0] + ground_truth_waveforms = denoiser(ground_truth_waveforms) + for i in range(pred_waveforms.shape[0]): + audio = pred_waveforms[i][0].cpu().numpy() wavfile.write(osp.join(output_dir, f'{b}_{i}.wav'), 22050, audio) + audio = ground_truth_waveforms[i][0].cpu().numpy() + wavfile.write(osp.join(output_dir, f'{b}_{i}_ground_truth.wav'), 22050, audio) if __name__ == "__main__": @@ -36,7 +40,7 @@ if __name__ == "__main__": torch.backends.cudnn.benchmark = True want_metrics = False parser = argparse.ArgumentParser() - parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_tacotron2_lj.yml') + parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_vqvae_audio_lj.yml') opt = option.parse(parser.parse_args().opt, is_train=False) opt = option.dict_to_nonedict(opt) utils.util.loaded_options = opt diff --git a/codes/trainer/injectors/base_injectors.py b/codes/trainer/injectors/base_injectors.py index ce53f972..9500a278 100644 --- a/codes/trainer/injectors/base_injectors.py +++ b/codes/trainer/injectors/base_injectors.py @@ -434,7 +434,31 @@ class DecomposeDimensionInjector(Injector): shape = list(inp.shape) del dims[self.dim] del shape[self.dim] - return {self.output: inp.permute([self.dim] + dims).reshape((-1,) + tuple(shape[1:]))} + + # Compute the reverse permutation and shape arguments needed to undo this operation. + rev_shape = [inp.shape[self.dim]] + shape.copy() + rev_permute = list(range(len(inp.shape)))[1:] # Looks like [1,2,3] + rev_permute = rev_permute[:self.dim] + [0] + (rev_permute[self.dim:] if self.dim < len(rev_permute) else []) + + return {self.output: inp.permute([self.dim] + dims).reshape((-1,) + tuple(shape[1:])), + f'{self.output}_reverse_shape': rev_shape, + f'{self.output}_reverse_permute': rev_permute} + + +# Undoes a decompose. +class RecomposeDimensionInjector(Injector): + def __init__(self, opt, env): + super().__init__(opt, env) + self.rev_shape_key = opt['reverse_shape'] + self.rev_permute_key = opt['reverse_permute'] + + def forward(self, state): + inp = state[self.input] + rev_shape = state[self.rev_shape_key] + rev_permute = state[self.rev_permute_key] + out = inp.reshape(rev_shape) + out = out.permute(rev_permute).contiguous() + return {self.output: out} # Performs normalization across fixed constants. @@ -450,6 +474,19 @@ class NormalizeInjector(Injector): return {self.output: out} +# Performs normalization across fixed constants. +class DenormalizeInjector(Injector): + def __init__(self, opt, env): + super().__init__(opt, env) + self.shift = opt['shift'] + self.scale = opt['scale'] + + def forward(self, state): + inp = state[self.input] + out = inp * self.scale + self.shift + return {self.output: out} + + if __name__ == '__main__': inj = DecomposeDimensionInjector({'dim':2, 'in': 'x', 'out': 'y'}, None)