Enable testing in ExtensibleTrainer, fix it in SRGAN_model

Also compute fea loss for this.
2020-08-31 09:41:48 -06:00 · 2020-08-31 09:41:48 -06:00 · 4b4d08bdec
commit 4b4d08bdec
parent b2091cb698
7 changed files with 86 additions and 68 deletions
--- a/codes/models/ExtensibleTrainer.py
+++ b/codes/models/ExtensibleTrainer.py
@ -30,6 +30,11 @@ class ExtensibleTrainer(BaseModel):
               'opt': opt,
               'step': 0}

+        self.mega_batch_factor = 1
+        if self.is_train:
+            self.mega_batch_factor = train_opt['mega_batch_factor']
+            self.env['mega_batch_factor'] = self.mega_batch_factor
+
        self.netsG = {}
        self.netsD = {}
        self.netF = networks.define_F().to(self.device)  # Used to compute feature loss.
@ -49,12 +54,6 @@ class ExtensibleTrainer(BaseModel):
            step = ConfigurableStep(step, self.env)
            self.steps.append(step)

-        if self.is_train:
-            self.mega_batch_factor = train_opt['mega_batch_factor']
-            if self.mega_batch_factor is None:
-                self.mega_batch_factor = 1
-            self.env['mega_batch_factor'] = self.mega_batch_factor
-
        # The steps rely on the networks being placed in the env, so put them there. Even though they arent wrapped
        # yet.
        self.env['generators'] = self.netsG
@ -65,11 +64,14 @@ class ExtensibleTrainer(BaseModel):
            s.define_optimizers()
            self.optimizers.extend(s.get_optimizers())

+        if self.is_train:
            # Find the optimizers that are using the default scheduler, then build them.
            def_opt = []
            for s in self.steps:
                def_opt.extend(s.get_optimizers_with_default_scheduler())
            self.schedulers = lr_scheduler.get_scheduler_for_name(train_opt['default_lr_scheme'], def_opt, train_opt)
+        else:
+            self.schedulers = []

        # Initialize amp.
        total_nets = [g for g in self.netsG.values()] + [d for d in self.netsD.values()]
@ -121,7 +123,12 @@ class ExtensibleTrainer(BaseModel):
        # Setting this to false triggers SRGAN to call the models update_model() function on the first iteration.
        self.updated = True

-    def feed_data(self, data):
+    def feed_data(self, data, need_GT=False):
+        self.eval_state = {}
+        for o in self.optimizers:
+            o.zero_grad()
+        torch.cuda.empty_cache()
+
        self.lq = torch.chunk(data['LQ'].to(self.device), chunks=self.mega_batch_factor, dim=0)
        self.hq = [t.to(self.device) for t in torch.chunk(data['GT'], chunks=self.mega_batch_factor, dim=0)]
        input_ref = data['ref'] if 'ref' in data else data['GT']
@ -206,7 +213,9 @@ class ExtensibleTrainer(BaseModel):
                for k, v in ns.items():
                    state[k] = [v]

-            self.eval_state = state
+            self.eval_state = {}
+            for k, v in state.items():
+                self.eval_state[k] = [s.detach().cpu() if isinstance(s, torch.Tensor) else s for s in v]

        for net in self.netsG.values():
            net.train()
--- a/codes/models/SRGAN_model.py
+++ b/codes/models/SRGAN_model.py
@ -352,6 +352,9 @@ class SRGANModel(BaseModel):
            self.gan_lq_img_use_prob = train_opt['gan_lowres_use_probability'] if train_opt['gan_lowres_use_probability'] else 0

            self.img_debug_steps = opt['logger']['img_debug_steps'] if 'img_debug_steps' in opt['logger'].keys() else 50
+        else:
+            self.netF = networks.define_F(use_bn=False).to(self.device)
+            self.cri_fea = nn.L1Loss().to(self.device)

        #self.print_network()  # print network
        self.load()  # load G and D if needed
--- a/codes/models/networks.py
+++ b/codes/models/networks.py
@ -181,7 +181,7 @@ def define_D_net(opt_net, img_sz=None, wrap=False):
        netD = SRGAN_arch.Discriminator_switched(in_nc=opt_net['in_nc'], nf=opt_net['nf'], initial_temp=opt_net['initial_temp'],
                                                    final_temperature_step=opt_net['final_temperature_step'])
    elif which_model == "cross_compare_vgg128":
-        netD = SRGAN_arch.CrossCompareDiscriminator(in_nc=opt_net['in_nc'], ref_channels=opt_net['ref_channels'], nf=opt_net['nf'], scale=opt_net['scale'])
+        netD = SRGAN_arch.CrossCompareDiscriminator(in_nc=opt_net['in_nc'], ref_channels=opt_net['ref_channels'] if 'ref_channels' in opt_net.keys() else 3, nf=opt_net['nf'], scale=opt_net['scale'])
    else:
        raise NotImplementedError('Discriminator model [{:s}] not recognized'.format(which_model))
    return netD
--- a/codes/models/steps/steps.py
+++ b/codes/models/steps/steps.py
@ -30,6 +30,7 @@ class ConfigurableStep(Module):

        losses = []
        self.weights = {}
+        if 'losses' in self.step_opt.keys():
            for loss_name, loss in self.step_opt['losses'].items():
                losses.append((loss_name, create_generator_loss(loss, env)))
                self.weights[loss_name] = loss['weight']
--- a/codes/test.py
+++ b/codes/test.py
@ -61,10 +61,8 @@ def forward_pass(model, output_dir, alteration_suffix=''):
    model.feed_data(data, need_GT=need_GT)
    model.test()

-    if isinstance(model.fake_GenOut[0], tuple):
-        visuals = model.fake_GenOut[0][0].detach().float().cpu()
-    else:
-        visuals = model.fake_GenOut[0].detach().float().cpu()
+    visuals = model.get_current_visuals()['rlt'].cpu()
+    fea_loss = 0
    for i in range(visuals.shape[0]):
        img_path = data['GT_path'][i] if need_GT else data['LQ_path'][i]
        img_name = osp.splitext(osp.basename(img_path))[0]
@ -78,7 +76,10 @@ def forward_pass(model, output_dir, alteration_suffix=''):
        else:
            save_img_path = osp.join(output_dir, img_name + '.png')

+        fea_loss += model.compute_fea_loss(visuals[i], data['GT'][i])
+
        util.save_img(sr_img, save_img_path)
+    return fea_loss


 if __name__ == "__main__":
@ -87,7 +88,7 @@ if __name__ == "__main__":
    want_just_images = True
    srg_analyze = False
    parser = argparse.ArgumentParser()
-    parser.add_argument('-opt', type=str, help='Path to options YMAL file.', default='../options/analyze_srg.yml')
+    parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/srgan_compute_feature.yml')
    opt = option.parse(parser.parse_args().opt, is_train=False)
    opt = option.dict_to_nonedict(opt)

@ -108,6 +109,7 @@ if __name__ == "__main__":
        test_loaders.append(test_loader)

    model = create_model(opt)
+    fea_loss = 0
    for test_loader in test_loaders:
        test_set_name = test_loader.dataset.opt['name']
        logger.info('\nTesting [{:s}]...'.format(test_set_name))
@ -143,4 +145,7 @@ if __name__ == "__main__":
                model_copy.load_state_dict(orig_model.state_dict())
                model.netG = model_copy
            else:
-                forward_pass(model, dataset_dir, opt['name'])
+                fea_loss += forward_pass(model, dataset_dir, opt['name'])
+
+        # log
+        logger.info('# Validation # Fea: {:.4e}'.format(fea_loss / len(test_loader)))
--- a/codes/train.py
+++ b/codes/train.py
@ -32,7 +32,7 @@ def init_dist(backend='nccl', **kwargs):
 def main():
    #### options
    parser = argparse.ArgumentParser()
-    parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_imgset_spsr_switched2_xlbatch_ragan.yml')
+    parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/srgan_compute_feature.yml')
    parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none',
                        help='job launcher')
    parser.add_argument('--local_rank', type=int, default=0)
--- a/codes/train2.py
+++ b/codes/train2.py
@ -32,7 +32,7 @@ def init_dist(backend='nccl', **kwargs):
 def main():
    #### options
    parser = argparse.ArgumentParser()
-    parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_imgset_spsr_switched2_fullimgref.yml')
+    parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_mi1_spsr_switched2_fullimgref.yml')
    parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none',
                        help='job launcher')
    parser.add_argument('--local_rank', type=int, default=0)