Allow inference for vqvae

2021-07-20 10:40:05 -06:00 · 2021-07-20 10:40:05 -06:00 · 2325e7a88c
commit 2325e7a88c
parent dfbb806a6e
2 changed files with 47 additions and 6 deletions
--- a/codes/scripts/audio/test_audio_gen.py
+++ b/codes/scripts/audio/test_audio_gen.py
@ -19,11 +19,15 @@ def forward_pass(model, denoiser, data, output_dir, opt, b):
    with torch.no_grad():
        model.feed_data(data, 0)
        model.test()
-    waveforms = model.eval_state[opt['eval']['output_state']][0]
-    waveforms = denoiser(waveforms)
-    for i in range(waveforms.shape[0]):
-        audio = waveforms[i][0].cpu().numpy()
+    pred_waveforms = model.eval_state[opt['eval']['output_state']][0]
+    pred_waveforms = denoiser(pred_waveforms)
+    ground_truth_waveforms = model.eval_state[opt['eval']['ground_truth']][0]
+    ground_truth_waveforms = denoiser(ground_truth_waveforms)
+    for i in range(pred_waveforms.shape[0]):
+        audio = pred_waveforms[i][0].cpu().numpy()
        wavfile.write(osp.join(output_dir, f'{b}_{i}.wav'), 22050, audio)
+        audio = ground_truth_waveforms[i][0].cpu().numpy()
+        wavfile.write(osp.join(output_dir, f'{b}_{i}_ground_truth.wav'), 22050, audio)


 if __name__ == "__main__":
@ -36,7 +40,7 @@ if __name__ == "__main__":
    torch.backends.cudnn.benchmark = True
    want_metrics = False
    parser = argparse.ArgumentParser()
-    parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_tacotron2_lj.yml')
+    parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_vqvae_audio_lj.yml')
    opt = option.parse(parser.parse_args().opt, is_train=False)
    opt = option.dict_to_nonedict(opt)
    utils.util.loaded_options = opt
--- a/codes/trainer/injectors/base_injectors.py
+++ b/codes/trainer/injectors/base_injectors.py
@ -434,7 +434,31 @@ class DecomposeDimensionInjector(Injector):
        shape = list(inp.shape)
        del dims[self.dim]
        del shape[self.dim]
-        return {self.output: inp.permute([self.dim] + dims).reshape((-1,) + tuple(shape[1:]))}
+
+        # Compute the reverse permutation and shape arguments needed to undo this operation.
+        rev_shape = [inp.shape[self.dim]] + shape.copy()
+        rev_permute = list(range(len(inp.shape)))[1:]  # Looks like [1,2,3]
+        rev_permute = rev_permute[:self.dim] + [0] + (rev_permute[self.dim:] if self.dim < len(rev_permute) else [])
+
+        return {self.output: inp.permute([self.dim] + dims).reshape((-1,) + tuple(shape[1:])),
+                f'{self.output}_reverse_shape': rev_shape,
+                f'{self.output}_reverse_permute': rev_permute}
+
+
+# Undoes a decompose.
+class RecomposeDimensionInjector(Injector):
+    def __init__(self, opt, env):
+        super().__init__(opt, env)
+        self.rev_shape_key = opt['reverse_shape']
+        self.rev_permute_key = opt['reverse_permute']
+
+    def forward(self, state):
+        inp = state[self.input]
+        rev_shape = state[self.rev_shape_key]
+        rev_permute = state[self.rev_permute_key]
+        out = inp.reshape(rev_shape)
+        out = out.permute(rev_permute).contiguous()
+        return {self.output: out}


 # Performs normalization across fixed constants.
@ -450,6 +474,19 @@ class NormalizeInjector(Injector):
        return {self.output: out}


+# Performs normalization across fixed constants.
+class DenormalizeInjector(Injector):
+    def __init__(self, opt, env):
+        super().__init__(opt, env)
+        self.shift = opt['shift']
+        self.scale = opt['scale']
+
+    def forward(self, state):
+        inp = state[self.input]
+        out = inp * self.scale + self.shift
+        return {self.output: out}
+
+

 if __name__ == '__main__':
    inj = DecomposeDimensionInjector({'dim':2, 'in': 'x', 'out': 'y'}, None)