Allow inference for vqvae

This commit is contained in:
James Betker 2021-07-20 10:40:05 -06:00
parent dfbb806a6e
commit 2325e7a88c
2 changed files with 47 additions and 6 deletions

View File

@ -19,11 +19,15 @@ def forward_pass(model, denoiser, data, output_dir, opt, b):
with torch.no_grad():
model.feed_data(data, 0)
model.test()
waveforms = model.eval_state[opt['eval']['output_state']][0]
waveforms = denoiser(waveforms)
for i in range(waveforms.shape[0]):
audio = waveforms[i][0].cpu().numpy()
pred_waveforms = model.eval_state[opt['eval']['output_state']][0]
pred_waveforms = denoiser(pred_waveforms)
ground_truth_waveforms = model.eval_state[opt['eval']['ground_truth']][0]
ground_truth_waveforms = denoiser(ground_truth_waveforms)
for i in range(pred_waveforms.shape[0]):
audio = pred_waveforms[i][0].cpu().numpy()
wavfile.write(osp.join(output_dir, f'{b}_{i}.wav'), 22050, audio)
audio = ground_truth_waveforms[i][0].cpu().numpy()
wavfile.write(osp.join(output_dir, f'{b}_{i}_ground_truth.wav'), 22050, audio)
if __name__ == "__main__":
@ -36,7 +40,7 @@ if __name__ == "__main__":
torch.backends.cudnn.benchmark = True
want_metrics = False
parser = argparse.ArgumentParser()
parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_tacotron2_lj.yml')
parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_vqvae_audio_lj.yml')
opt = option.parse(parser.parse_args().opt, is_train=False)
opt = option.dict_to_nonedict(opt)
utils.util.loaded_options = opt

View File

@ -434,7 +434,31 @@ class DecomposeDimensionInjector(Injector):
shape = list(inp.shape)
del dims[self.dim]
del shape[self.dim]
return {self.output: inp.permute([self.dim] + dims).reshape((-1,) + tuple(shape[1:]))}
# Compute the reverse permutation and shape arguments needed to undo this operation.
rev_shape = [inp.shape[self.dim]] + shape.copy()
rev_permute = list(range(len(inp.shape)))[1:] # Looks like [1,2,3]
rev_permute = rev_permute[:self.dim] + [0] + (rev_permute[self.dim:] if self.dim < len(rev_permute) else [])
return {self.output: inp.permute([self.dim] + dims).reshape((-1,) + tuple(shape[1:])),
f'{self.output}_reverse_shape': rev_shape,
f'{self.output}_reverse_permute': rev_permute}
# Undoes a decompose.
class RecomposeDimensionInjector(Injector):
def __init__(self, opt, env):
super().__init__(opt, env)
self.rev_shape_key = opt['reverse_shape']
self.rev_permute_key = opt['reverse_permute']
def forward(self, state):
inp = state[self.input]
rev_shape = state[self.rev_shape_key]
rev_permute = state[self.rev_permute_key]
out = inp.reshape(rev_shape)
out = out.permute(rev_permute).contiguous()
return {self.output: out}
# Performs normalization across fixed constants.
@ -450,6 +474,19 @@ class NormalizeInjector(Injector):
return {self.output: out}
# Performs normalization across fixed constants.
class DenormalizeInjector(Injector):
def __init__(self, opt, env):
super().__init__(opt, env)
self.shift = opt['shift']
self.scale = opt['scale']
def forward(self, state):
inp = state[self.input]
out = inp * self.scale + self.shift
return {self.output: out}
if __name__ == '__main__':
inj = DecomposeDimensionInjector({'dim':2, 'in': 'x', 'out': 'y'}, None)