From 4c85ee51a48b510c26fa50b60737b66670fe388b Mon Sep 17 00:00:00 2001 From: James Betker Date: Thu, 8 Oct 2020 17:23:21 -0600 Subject: [PATCH] Converge SSG architectures into unified switching base class Also adds attention norm histogram to logging --- .../archs/StructuredSwitchedGenerator.py | 147 ++---------------- .../archs/SwitchedResidualGenerator_arch.py | 59 +++++++ codes/train2.py | 2 +- 3 files changed, 69 insertions(+), 139 deletions(-) diff --git a/codes/models/archs/StructuredSwitchedGenerator.py b/codes/models/archs/StructuredSwitchedGenerator.py index 8c74b313..ea6f12fc 100644 --- a/codes/models/archs/StructuredSwitchedGenerator.py +++ b/codes/models/archs/StructuredSwitchedGenerator.py @@ -1,7 +1,7 @@ import math import functools from models.archs.arch_util import MultiConvBlock, ConvGnLelu, ConvGnSilu, ReferenceJoinBlock -from models.archs.SwitchedResidualGenerator_arch import ConfigurableSwitchComputer, gather_2d +from models.archs.SwitchedResidualGenerator_arch import ConfigurableSwitchComputer, gather_2d, SwitchModelBase from models.archs.SPSR_arch import ImageGradientNoPadding from torch import nn import torch @@ -152,9 +152,9 @@ class SwitchWithReference(nn.Module): return self.switch(x, True, identity=x, att_in=(x, mplex_ref)) -class SSGr1(nn.Module): +class SSGr1(SwitchModelBase): def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10): - super(SSGr1, self).__init__() + super(SSGr1, self).__init__(init_temperature, 10000) n_upscale = int(math.log(upscale, 2)) self.nf = nf @@ -180,10 +180,6 @@ class SSGr1(nn.Module): self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True) self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False) self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch] - self.attentions = None - self.lr = None - self.init_temperature = init_temperature - self.final_temperature_step = 10000 def forward(self, x, ref, ref_center, save_attentions=True): # The attention_maps debugger outputs . Save that here. @@ -218,39 +214,10 @@ class SSGr1(nn.Module): self.fea_grad_std = fea_grad_std.detach().cpu() return x_grad_out, x_out, x_grad - def set_temperature(self, temp): - [sw.set_temperature(temp) for sw in self.switches] - def update_for_step(self, step, experiments_path='.'): - if self.attentions: - temp = max(1, 1 + self.init_temperature * - (self.final_temperature_step - step) / self.final_temperature_step) - self.set_temperature(temp) - if step % 200 == 0: - output_path = os.path.join(experiments_path, "attention_maps") - prefix = "amap_%i_a%i_%%i.png" - [save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))] - torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,))) - - - def get_debug_values(self, step, net_name): - if self.attentions: - temp = self.switches[0].switch.temperature - mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions] - means = [i[0] for i in mean_hists] - hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists] - val = {"switch_temperature": temp, - "grad_branch_feat_intg_std_dev": self.grad_fea_std, - "conjoin_branch_grad_intg_std_dev": self.fea_grad_std} - for i in range(len(means)): - val["switch_%i_specificity" % (i,)] = means[i] - val["switch_%i_histogram" % (i,)] = hists[i] - return val - - -class StackedSwitchGenerator(nn.Module): +class StackedSwitchGenerator(SwitchModelBase): def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10): - super(StackedSwitchGenerator, self).__init__() + super(StackedSwitchGenerator, self).__init__(init_temperature, 10000) n_upscale = int(math.log(upscale, 2)) self.nf = nf @@ -268,10 +235,6 @@ class StackedSwitchGenerator(nn.Module): self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True) self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True) self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False) - self.attentions = None - self.lr = None - self.init_temperature = init_temperature - self.final_temperature_step = 10000 def forward(self, x, ref, ref_center, save_attentions=True): # The attention_maps debugger outputs . Save that here. @@ -292,36 +255,10 @@ class StackedSwitchGenerator(nn.Module): self.attentions = [a1, a3, a3] return x_out, - def set_temperature(self, temp): - [sw.set_temperature(temp) for sw in self.switches] - def update_for_step(self, step, experiments_path='.'): - if self.attentions: - temp = max(1, 1 + self.init_temperature * - (self.final_temperature_step - step) / self.final_temperature_step) - self.set_temperature(temp) - if step % 200 == 0: - output_path = os.path.join(experiments_path, "attention_maps") - prefix = "amap_%i_a%i_%%i.png" - [save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))] - torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,))) - - - def get_debug_values(self, step, net_name): - temp = self.switches[0].switch.temperature - mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions] - means = [i[0] for i in mean_hists] - hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists] - val = {"switch_temperature": temp} - for i in range(len(means)): - val["switch_%i_specificity" % (i,)] = means[i] - val["switch_%i_histogram" % (i,)] = hists[i] - return val - - -class SSGDeep(nn.Module): +class SSGDeep(SwitchModelBase): def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10): - super(SSGDeep, self).__init__() + super(SSGDeep, self).__init__(init_temperature, 10000) n_upscale = int(math.log(upscale, 2)) self.nf = nf @@ -349,10 +286,6 @@ class SSGDeep(nn.Module): self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True) self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False) self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch, self.sw3.switch, self.sw4.switch] - self.attentions = None - self.lr = None - self.init_temperature = init_temperature - self.final_temperature_step = 10000 def forward(self, x, ref, ref_center, save_attentions=True): # The attention_maps debugger outputs . Save that here. @@ -389,38 +322,10 @@ class SSGDeep(nn.Module): self.fea_grad_std = fea_grad_std.detach().cpu() return x_grad_out, x_out, x_grad - def set_temperature(self, temp): - [sw.set_temperature(temp) for sw in self.switches] - def update_for_step(self, step, experiments_path='.'): - if self.attentions: - temp = max(1, 1 + self.init_temperature * - (self.final_temperature_step - step) / self.final_temperature_step) - self.set_temperature(temp) - if step % 200 == 0: - output_path = os.path.join(experiments_path, "attention_maps") - prefix = "amap_%i_a%i_%%i.png" - [save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))] - torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,))) - - - def get_debug_values(self, step, net_name): - temp = self.switches[0].switch.temperature - mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions] - means = [i[0] for i in mean_hists] - hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists] - val = {"switch_temperature": temp, - "grad_branch_feat_intg_std_dev": self.grad_fea_std, - "conjoin_branch_grad_intg_std_dev": self.fea_grad_std} - for i in range(len(means)): - val["switch_%i_specificity" % (i,)] = means[i] - val["switch_%i_histogram" % (i,)] = hists[i] - return val - - -class StackedSwitchGenerator5Layer(nn.Module): +class StackedSwitchGenerator5Layer(SwitchModelBase): def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10): - super(StackedSwitchGenerator5Layer, self).__init__() + super(StackedSwitchGenerator5Layer, self).__init__(init_temperature, 10000) n_upscale = int(math.log(upscale, 2)) self.nf = nf @@ -440,10 +345,6 @@ class StackedSwitchGenerator5Layer(nn.Module): self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True) self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True) self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False) - self.attentions = None - self.lr = None - self.init_temperature = init_temperature - self.final_temperature_step = 10000 def forward(self, x, ref, ref_center, save_attentions=True): # The attention_maps debugger outputs . Save that here. @@ -471,33 +372,3 @@ class StackedSwitchGenerator5Layer(nn.Module): self.attentions = [a1, a3, a3, a4, a5] return x_out, - def set_temperature(self, temp): - [sw.set_temperature(temp) for sw in self.switches] - - def update_for_step(self, step, experiments_path='.'): - if self.attentions: - # All-reduce the attention norm. - for sw in self.switches: - sw.switch.reduce_norm_params() - - temp = max(1, 1 + self.init_temperature * - (self.final_temperature_step - step) / self.final_temperature_step) - self.set_temperature(temp) - if step % 200 == 0: - output_path = os.path.join(experiments_path, "attention_maps") - prefix = "amap_%i_a%i_%%i.png" - [save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, - output_mag=False) for i in range(len(self.attentions))] - torchvision.utils.save_image(self.lr[:,:3], os.path.join(experiments_path, "attention_maps", - "amap_%i_base_image.png" % (step,))) - - def get_debug_values(self, step, net_name): - temp = self.switches[0].switch.temperature - mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions] - means = [i[0] for i in mean_hists] - hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists] - val = {"switch_temperature": temp} - for i in range(len(means)): - val["switch_%i_specificity" % (i,)] = means[i] - val["switch_%i_histogram" % (i,)] = hists[i] - return val diff --git a/codes/models/archs/SwitchedResidualGenerator_arch.py b/codes/models/archs/SwitchedResidualGenerator_arch.py index e2345912..9c0e43fa 100644 --- a/codes/models/archs/SwitchedResidualGenerator_arch.py +++ b/codes/models/archs/SwitchedResidualGenerator_arch.py @@ -8,6 +8,7 @@ from models.archs.arch_util import ConvBnLelu, ConvGnSilu, ExpansionBlock, Expan from switched_conv.switched_conv_util import save_attention_to_image_rgb import os from models.archs.spinenet_arch import SpineNet +import torchvision # VGG-style layer with Conv(stride2)->BN->Activation->Conv->BN->Activation # Doubles the input filter count. @@ -533,6 +534,64 @@ class QueryKeyPyramidMultiplexer(nn.Module): return v.view(b, t, h, w) +# Base class for models that utilize ConfigurableSwitchComputer. Provides basis functionality like logging +# switch temperature, distribution and images, as well as managing attention norms. +class SwitchModelBase(nn.Module): + def __init__(self, init_temperature=10, final_temperature_step=10000): + super(SwitchModelBase, self).__init__() + self.switches = [] # The implementing class is expected to set this to a list of all ConfigurableSwitchComputers. + self.attentions = [] # The implementing class is expected to set this in forward() to the output of the attention blocks. + self.lr = None # The implementing class is expected to set this to the input image fed into the generator. If not + # set, the attention logger will not output an image reference. + self.init_temperature = init_temperature + self.final_temperature_step = final_temperature_step + + def set_temperature(self, temp): + [sw.set_temperature(temp) for sw in self.switches] + + def update_for_step(self, step, experiments_path='.'): + # All-reduce the attention norm. + for sw in self.switches: + sw.switch.reduce_norm_params() + + temp = max(1, 1 + self.init_temperature * + (self.final_temperature_step - step) / self.final_temperature_step) + self.set_temperature(temp) + if step % 200 == 0: + output_path = os.path.join(experiments_path, "attention_maps") + prefix = "amap_%i_a%i_%%i.png" + [save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, + output_mag=False) for i in range(len(self.attentions))] + if self.lr: + torchvision.utils.save_image(self.lr[:, :3], os.path.join(experiments_path, "attention_maps", + "amap_%i_base_image.png" % (step,))) + + # This is a bit awkward. We want this plot to show up in TB as a histogram, but we are getting an intensity + # plot out of the attention norm tensor. So we need to convert it back into a list of indexes, then feed into TB. + def compute_anorm_histogram(self): + intensities = [sw.switch.attention_norm.compute_buffer_norm().clone().detach().cpu() for sw in self.switches] + result = [] + for intensity in intensities: + intensity = intensity * 10 + bins = torch.tensor(list(range(len(intensity)))) + intensity = intensity.long() + result.append(bins.repeat_interleave(intensity, 0)) + return result + + def get_debug_values(self, step, net_name): + temp = self.switches[0].switch.temperature + mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions] + means = [i[0] for i in mean_hists] + hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists] + anorms = self.compute_anorm_histogram() + val = {"switch_temperature": temp} + for i in range(len(means)): + val["switch_%i_specificity" % (i,)] = means[i] + val["switch_%i_histogram" % (i,)] = hists[i] + val["switch_%i_attention_norm_histogram" % (i,)] = anorms[i] + return val + + if __name__ == '__main__': bb = BackboneEncoder(64) emb = QueryKeyMultiplexer(64, 10) diff --git a/codes/train2.py b/codes/train2.py index b1b011f9..f6dd496a 100644 --- a/codes/train2.py +++ b/codes/train2.py @@ -32,7 +32,7 @@ def init_dist(backend='nccl', **kwargs): def main(): #### options parser = argparse.ArgumentParser() - parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_teco_vix_stacked_rrdb.yml') + parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_exd_imgset_ssgr.yml') parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher') parser.add_argument('--local_rank', type=int, default=0) args = parser.parse_args()