Upgrade tecogan_losses for speed

2020-10-27 22:40:15 -06:00 · 2020-10-27 22:40:15 -06:00 · 4dc16d5889
commit 4dc16d5889
parent ac3da0c5a6
2 changed files with 111 additions and 48 deletions
--- a/codes/models/steps/injectors.py
+++ b/codes/models/steps/injectors.py
@ -31,8 +31,6 @@ def create_injector(opt_inject, env):
        return GreyInjector(opt_inject, env)
    elif type == 'interpolate':
        return InterpolateInjector(opt_inject, env)
    elif type == 'imageflow':
        return ImageFlowInjector(opt_inject, env)
    elif type == 'image_patch':
        return ImagePatchInjector(opt_inject, env)
    elif type == 'concatenate':
--- a/codes/models/steps/tecogan_losses.py
+++ b/codes/models/steps/tecogan_losses.py
@ -26,23 +26,6 @@ def create_teco_injector(opt, env):
        return FlowAdjustment(opt, env)
    return None
 def create_teco_discriminator_sextuplet(input_list, lr_imgs, scale, index, flow_gen, resampler, margin):
    # Flow is interpreted from the LR images so that the generator cannot learn to manipulate it.
    with autocast(enabled=False):
        triplet = input_list[:, index:index+3].float()
        first_flow = flow_gen(torch.stack([triplet[:,1], triplet[:,0]], dim=2))
        last_flow = flow_gen(torch.stack([triplet[:,1], triplet[:,2]], dim=2))
        flow_triplet = [resampler(triplet[:,0], first_flow),
                        triplet[:,1],
                        resampler(triplet[:,2], last_flow)]
        flow_triplet = torch.stack(flow_triplet, dim=1)
        combined = torch.cat([triplet, flow_triplet], dim=1)
        b, f, c, h, w = combined.shape
        combined = combined.view(b, 3*6, h, w)  # 3*6 is essentially an assertion here.
    # Apply margin
    return combined[:, :, margin:-margin, margin:-margin]
 def extract_inputs_index(inputs, i):
    res = []
    for input in inputs:
@ -152,9 +135,14 @@ class RecurrentImageGeneratorSequenceInjector(Injector):
                    results[out_key].append(gen_out[i])
                recurrent_input = gen_out[self.output_hq_index]
        final_results = {}
        # Include 'hq_batched' here - because why not... Don't really need a separate injector for this.
        b, s, c, h, w = state['hq'].shape
        final_results['hq_batched'] = state['hq'].view(b*s, c, h, w)
        for k, v in results.items():
-            results[k] = torch.stack(v, dim=1)
+            final_results[k] = torch.stack(v, dim=1)
-        return results
+            final_results[k + "_batched"] = torch.cat(v[:s], dim=0)  # Only include the original sequence - this output is generally used to compare against HQ.
        return final_results
    def produce_teco_visual_debugs(self, gen_input, gen_recurrent, it):
        if self.env['rank'] > 0:
@ -183,6 +171,47 @@ class FlowAdjustment(Injector):
            return {self.output: self.resample(state[self.flowed], flowfield)}
 def create_teco_discriminator_sextuplet(input_list, lr_imgs, scale, index, flow_gen, resampler, margin):
    # Flow is interpreted from the LR images so that the generator cannot learn to manipulate it.
    with autocast(enabled=False):
        triplet = input_list[:, index:index+3].float()
        first_flow = flow_gen(torch.stack([triplet[:,1], triplet[:,0]], dim=2))
        last_flow = flow_gen(torch.stack([triplet[:,1], triplet[:,2]], dim=2))
        flow_triplet = [resampler(triplet[:,0], first_flow),
                        triplet[:,1],
                        resampler(triplet[:,2], last_flow)]
        flow_triplet = torch.stack(flow_triplet, dim=1)
        combined = torch.cat([triplet, flow_triplet], dim=1)
        b, f, c, h, w = combined.shape
        combined = combined.view(b, 3*6, h, w)  # 3*6 is essentially an assertion here.
    # Apply margin
    return combined[:, :, margin:-margin, margin:-margin]
 def create_all_discriminator_sextuplets(input_list, lr_imgs, scale, total, flow_gen, resampler, margin):
    # Combine everything and feed it into the flow network at once for better efficiency.
    batch_sz = input_list.shape[0]
    flux_doubles_forward = [torch.stack([input_list[:,i], input_list[:,i+1]], dim=2) for i in range(1, total+1)]
    flux_doubles_backward = [torch.stack([input_list[:,i], input_list[:,i-1]], dim=2) for i in range(1, total+1)]
    flows_forward = flow_gen(torch.cat(flux_doubles_forward, dim=0))
    flows_backward = flow_gen(torch.cat(flux_doubles_backward, dim=0))
    sexts = []
    for i in range(total):
        flow_forward = flows_forward[batch_sz*i:batch_sz*(i+1)]
        flow_backward = flows_backward[batch_sz*i:batch_sz*(i+1)]
        mid = input_list[:,i+1]
        sext = torch.stack([input_list[:,i], mid, input_list[:,i+2],
                          resampler(mid, flow_backward),
                          mid,
                          resampler(mid, flow_forward)], dim=1)
        # Apply margin
        b, f, c, h, w = sext.shape
        sext = sext.view(b, 3*6, h, w)  # f*c = 6*3
        sext = sext[:, :, margin:-margin, margin:-margin]
        sexts.append(sext)
    return torch.cat(sexts, dim=0)
 # This is the temporal discriminator loss from TecoGAN.
 #
 # It has a strict contract for 'real' and 'fake' inputs:
@ -208,28 +237,67 @@ class TecoGanLoss(ConfigurableLoss):
        self.for_generator = opt['for_generator']
        self.min_loss = opt['min_loss'] if 'min_loss' in opt.keys() else 0
        self.margin = opt['margin']  # Per the tecogan paper, the GAN loss only pays attention to an inner part of the image with the margin removed, to get rid of artifacts resulting from flow errors.
        self.ff = opt['fast_forward'] if 'fast_forward' in opt.keys() else False
    def forward(self, _, state):
-        fp16 = self.env['opt']['fp16']
+        if self.ff:
-        net = self.env['discriminators'][self.opt['discriminator']]
+            return self.fast_forward(state)
        else:
            return self.lowmem_forward(state)
    # Computes the discriminator loss one recursive step at a time, which has a lower memory overhead but is
    # slower.
    def lowmem_forward(self, state):
        flow_gen = self.env['generators'][self.image_flow_generator]
        real = state[self.opt['real']]
        fake = state[self.opt['fake']]
        sequence_len = real.shape[1]
        lr = state[self.opt['lr_inputs']]
        l_total = 0
        # Create a list of all the discriminator inputs, which will be reduced into the batch dim for efficient computation.
        for i in range(sequence_len - 2):
            real_sext = create_teco_discriminator_sextuplet(real, lr, self.scale, i, flow_gen, self.resampler, self.margin)
            fake_sext = create_teco_discriminator_sextuplet(fake, lr, self.scale, i, flow_gen, self.resampler, self.margin)
            l_step = self.compute_loss(real_sext, fake_sext)
            if l_step > self.min_loss:
                l_total += l_step
        return l_total
    # Computes the discriminator loss by dogpiling all of the sextuplets into the batch dimension and doing one massive
    # forward() on the discriminators. High memory but faster.
    def fast_forward(self, state):
        flow_gen = self.env['generators'][self.image_flow_generator]
        real = state[self.opt['real']]
        fake = state[self.opt['fake']]
        sequence_len = real.shape[1]
        lr = state[self.opt['lr_inputs']]
        # Create a list of all the discriminator inputs, which will be reduced into the batch dim for efficient computation.
        combined_real_sext = create_all_discriminator_sextuplets(real, lr, self.scale, sequence_len - 2, flow_gen,
                                                                 self.resampler, self.margin)
        combined_fake_sext = create_all_discriminator_sextuplets(fake, lr, self.scale, sequence_len - 2, flow_gen,
                                                                 self.resampler, self.margin)
        l_total = self.compute_loss(combined_real_sext, combined_fake_sext)
        if l_total < self.min_loss:
            l_total = 0
        return l_total
    def compute_loss(self, real_sext, fake_sext):
        fp16 = self.env['opt']['fp16']
        net = self.env['discriminators'][self.opt['discriminator']]
        with autocast(enabled=fp16):
            d_fake = net(fake_sext)
            d_real = net(real_sext)
        self.metrics.append(("d_fake", torch.mean(d_fake)))
        self.metrics.append(("d_real", torch.mean(d_real)))
        if self.for_generator and self.env['step'] % 50 == 0:
-                self.produce_teco_visual_debugs(fake_sext, 'fake', i)
+            self.produce_teco_visual_debugs(fake_sext, 'fake', 0)
-                self.produce_teco_visual_debugs(real_sext, 'real', i)
+            self.produce_teco_visual_debugs(real_sext, 'real', 0)
        if self.opt['gan_type'] in ['gan', 'pixgan']:
            l_fake = self.criterion(d_fake, self.for_generator)
@ -245,10 +313,8 @@ class TecoGanLoss(ConfigurableLoss):
                      self.criterion(d_fake_diff, self.for_generator))
        else:
            raise NotImplementedError
            if l_step > self.min_loss:
                l_total += l_step
-        return l_total
+        return l_step
    def produce_teco_visual_debugs(self, sext, lbl, it):
        if self.env['rank'] > 0:
@ -291,4 +357,3 @@ class PingPongLoss(ConfigurableLoss):
            img = imglist[:, i]
            torchvision.utils.save_image(img.float(), osp.join(base_path, "%s.png" % (i, )))