diff --git a/codes/models/steps/tecogan_losses.py b/codes/models/steps/tecogan_losses.py index db301c7a..b460a15d 100644 --- a/codes/models/steps/tecogan_losses.py +++ b/codes/models/steps/tecogan_losses.py @@ -97,9 +97,15 @@ class RecurrentImageGeneratorSequenceInjector(Injector): else: input = extract_inputs_index(inputs, i) with torch.no_grad() and autocast(enabled=False): - reduced_recurrent = F.interpolate(recurrent_input, scale_factor=1/self.scale, mode='bicubic') - flow_input = torch.stack([input[self.input_lq_index], reduced_recurrent], dim=2).float() - flowfield = F.interpolate(flow(flow_input), scale_factor=self.scale, mode='bicubic') + # This is a hack to workaround the fact that flownet2 cannot operate at resolutions < 64px. An assumption is + # made here that if you are operating at 4x scale, your inputs are 32px x 32px + if self.scale >= 4: + flow_input = F.interpolate(input[self.input_lq_index], scale_factor=self.scale//2, mode='bicubic') + else: + flow_input = input[self.input_lq_index] + reduced_recurrent = F.interpolate(recurrent_input, scale_factor=.5, mode='bicubic') + flow_input = torch.stack([flow_input, reduced_recurrent], dim=2).float() + flowfield = F.interpolate(flow(flow_input), scale_factor=2, mode='bicubic') recurrent_input = self.resample(recurrent_input.float(), flowfield) input[self.recurrent_index] = recurrent_input if self.env['step'] % 50 == 0: @@ -122,9 +128,15 @@ class RecurrentImageGeneratorSequenceInjector(Injector): input = extract_inputs_index(inputs, i) with torch.no_grad(): with autocast(enabled=False): - reduced_recurrent = F.interpolate(recurrent_input, scale_factor=1 / self.scale, mode='bicubic') - flow_input = torch.stack([input[self.input_lq_index], reduced_recurrent], dim=2).float() - flowfield = F.interpolate(flow(flow_input), scale_factor=self.scale, mode='bicubic') + # This is a hack to workaround the fact that flownet2 cannot operate at resolutions < 64px. An assumption is + # made here that if you are operating at 4x scale, your inputs are 32px x 32px + if self.scale >= 4: + flow_input = F.interpolate(input[self.input_lq_index], scale_factor=self.scale//2, mode='bicubic') + else: + flow_input = input[self.input_lq_index] + reduced_recurrent = F.interpolate(recurrent_input, scale_factor=.5, mode='bicubic') + flow_input = torch.stack([flow_input, reduced_recurrent], dim=2).float() + flowfield = F.interpolate(flow(flow_input), scale_factor=2, mode='bicubic') recurrent_input = self.resample(recurrent_input.float(), flowfield) input[self.recurrent_index] = recurrent_input if self.env['step'] % 50 == 0: diff --git a/codes/multi_modal_train.py b/codes/multi_modal_train.py index 3b4cf1cb..23d1c379 100644 --- a/codes/multi_modal_train.py +++ b/codes/multi_modal_train.py @@ -42,14 +42,11 @@ def main(master_opt, launcher): if __name__ == '__main__': parser = argparse.ArgumentParser() - #parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_exd_imgset_chained_structured_trans_invariance.yml') + parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_exd_imgset_chained_structured_trans_invariance.yml') parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher') args = parser.parse_args() Loader, Dumper = OrderedYaml() with open(args.opt, mode='r') as f: opt = yaml.load(f, Loader=Loader) - opt = { - 'trainer_options': ['../options/teco.yml', '../options/exd.yml'] - } - main(opt, args.launcher) \ No newline at end of file + main(opt, args.launcher)