Converge SSG architectures into unified switching base class

Also adds attention norm histogram to logging
2020-10-08 17:23:21 -06:00 · 2020-10-08 17:23:21 -06:00 · 4c85ee51a4
commit 4c85ee51a4
parent 3cc56cd00b
3 changed files with 69 additions and 139 deletions
--- a/codes/models/archs/StructuredSwitchedGenerator.py
+++ b/codes/models/archs/StructuredSwitchedGenerator.py
@ -1,7 +1,7 @@
 import math
 import functools
 from models.archs.arch_util import MultiConvBlock, ConvGnLelu, ConvGnSilu, ReferenceJoinBlock
-from models.archs.SwitchedResidualGenerator_arch import ConfigurableSwitchComputer, gather_2d
+from models.archs.SwitchedResidualGenerator_arch import ConfigurableSwitchComputer, gather_2d, SwitchModelBase
 from models.archs.SPSR_arch import ImageGradientNoPadding
 from torch import nn
 import torch
@ -152,9 +152,9 @@ class SwitchWithReference(nn.Module):
            return self.switch(x, True, identity=x, att_in=(x, mplex_ref))
-class SSGr1(nn.Module):
+class SSGr1(SwitchModelBase):
    def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
-        super(SSGr1, self).__init__()
+        super(SSGr1, self).__init__(init_temperature, 10000)
        n_upscale = int(math.log(upscale, 2))
        self.nf = nf
@ -180,10 +180,6 @@ class SSGr1(nn.Module):
        self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
        self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
        self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch]
        self.attentions = None
        self.lr = None
        self.init_temperature = init_temperature
        self.final_temperature_step = 10000
    def forward(self, x, ref, ref_center, save_attentions=True):
        # The attention_maps debugger outputs <x>. Save that here.
@ -218,39 +214,10 @@ class SSGr1(nn.Module):
        self.fea_grad_std = fea_grad_std.detach().cpu()
        return x_grad_out, x_out, x_grad
    def set_temperature(self, temp):
        [sw.set_temperature(temp) for sw in self.switches]
-    def update_for_step(self, step, experiments_path='.'):
+class StackedSwitchGenerator(SwitchModelBase):
        if self.attentions:
            temp = max(1, 1 + self.init_temperature *
                       (self.final_temperature_step - step) / self.final_temperature_step)
            self.set_temperature(temp)
            if step % 200 == 0:
                output_path = os.path.join(experiments_path, "attention_maps")
                prefix = "amap_%i_a%i_%%i.png"
                [save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))]
                torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,)))
    def get_debug_values(self, step, net_name):
        if self.attentions:
            temp = self.switches[0].switch.temperature
            mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
            means = [i[0] for i in mean_hists]
            hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
            val = {"switch_temperature": temp,
                   "grad_branch_feat_intg_std_dev": self.grad_fea_std,
                   "conjoin_branch_grad_intg_std_dev": self.fea_grad_std}
            for i in range(len(means)):
                val["switch_%i_specificity" % (i,)] = means[i]
                val["switch_%i_histogram" % (i,)] = hists[i]
        return val
 class StackedSwitchGenerator(nn.Module):
    def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
-        super(StackedSwitchGenerator, self).__init__()
+        super(StackedSwitchGenerator, self).__init__(init_temperature, 10000)
        n_upscale = int(math.log(upscale, 2))
        self.nf = nf
@ -268,10 +235,6 @@ class StackedSwitchGenerator(nn.Module):
        self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True)
        self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
        self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
        self.attentions = None
        self.lr = None
        self.init_temperature = init_temperature
        self.final_temperature_step = 10000
    def forward(self, x, ref, ref_center, save_attentions=True):
        # The attention_maps debugger outputs <x>. Save that here.
@ -292,36 +255,10 @@ class StackedSwitchGenerator(nn.Module):
            self.attentions = [a1, a3, a3]
        return x_out,
    def set_temperature(self, temp):
        [sw.set_temperature(temp) for sw in self.switches]
-    def update_for_step(self, step, experiments_path='.'):
+class SSGDeep(SwitchModelBase):
        if self.attentions:
            temp = max(1, 1 + self.init_temperature *
                       (self.final_temperature_step - step) / self.final_temperature_step)
            self.set_temperature(temp)
            if step % 200 == 0:
                output_path = os.path.join(experiments_path, "attention_maps")
                prefix = "amap_%i_a%i_%%i.png"
                [save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))]
                torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,)))
    def get_debug_values(self, step, net_name):
        temp = self.switches[0].switch.temperature
        mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
        means = [i[0] for i in mean_hists]
        hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
        val = {"switch_temperature": temp}
        for i in range(len(means)):
            val["switch_%i_specificity" % (i,)] = means[i]
            val["switch_%i_histogram" % (i,)] = hists[i]
        return val
 class SSGDeep(nn.Module):
    def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
-        super(SSGDeep, self).__init__()
+        super(SSGDeep, self).__init__(init_temperature, 10000)
        n_upscale = int(math.log(upscale, 2))
        self.nf = nf
@ -349,10 +286,6 @@ class SSGDeep(nn.Module):
        self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
        self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
        self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch, self.sw3.switch, self.sw4.switch]
        self.attentions = None
        self.lr = None
        self.init_temperature = init_temperature
        self.final_temperature_step = 10000
    def forward(self, x, ref, ref_center, save_attentions=True):
        # The attention_maps debugger outputs <x>. Save that here.
@ -389,38 +322,10 @@ class SSGDeep(nn.Module):
        self.fea_grad_std = fea_grad_std.detach().cpu()
        return x_grad_out, x_out, x_grad
    def set_temperature(self, temp):
        [sw.set_temperature(temp) for sw in self.switches]
-    def update_for_step(self, step, experiments_path='.'):
+class StackedSwitchGenerator5Layer(SwitchModelBase):
        if self.attentions:
            temp = max(1, 1 + self.init_temperature *
                       (self.final_temperature_step - step) / self.final_temperature_step)
            self.set_temperature(temp)
            if step % 200 == 0:
                output_path = os.path.join(experiments_path, "attention_maps")
                prefix = "amap_%i_a%i_%%i.png"
                [save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))]
                torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,)))
    def get_debug_values(self, step, net_name):
        temp = self.switches[0].switch.temperature
        mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
        means = [i[0] for i in mean_hists]
        hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
        val = {"switch_temperature": temp,
               "grad_branch_feat_intg_std_dev": self.grad_fea_std,
               "conjoin_branch_grad_intg_std_dev": self.fea_grad_std}
        for i in range(len(means)):
            val["switch_%i_specificity" % (i,)] = means[i]
            val["switch_%i_histogram" % (i,)] = hists[i]
        return val
 class StackedSwitchGenerator5Layer(nn.Module):
    def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
-        super(StackedSwitchGenerator5Layer, self).__init__()
+        super(StackedSwitchGenerator5Layer, self).__init__(init_temperature, 10000)
        n_upscale = int(math.log(upscale, 2))
        self.nf = nf
@ -440,10 +345,6 @@ class StackedSwitchGenerator5Layer(nn.Module):
        self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True)
        self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
        self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
        self.attentions = None
        self.lr = None
        self.init_temperature = init_temperature
        self.final_temperature_step = 10000
    def forward(self, x, ref, ref_center, save_attentions=True):
        # The attention_maps debugger outputs <x>. Save that here.
@ -471,33 +372,3 @@ class StackedSwitchGenerator5Layer(nn.Module):
            self.attentions = [a1, a3, a3, a4, a5]
        return x_out,
    def set_temperature(self, temp):
        [sw.set_temperature(temp) for sw in self.switches]
    def update_for_step(self, step, experiments_path='.'):
        if self.attentions:
            # All-reduce the attention norm.
            for sw in self.switches:
                sw.switch.reduce_norm_params()
            temp = max(1, 1 + self.init_temperature *
                       (self.final_temperature_step - step) / self.final_temperature_step)
            self.set_temperature(temp)
            if step % 200 == 0:
                output_path = os.path.join(experiments_path, "attention_maps")
                prefix = "amap_%i_a%i_%%i.png"
                [save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step,
                                             output_mag=False) for i in range(len(self.attentions))]
                torchvision.utils.save_image(self.lr[:,:3], os.path.join(experiments_path, "attention_maps",
                                                                   "amap_%i_base_image.png" % (step,)))
    def get_debug_values(self, step, net_name):
        temp = self.switches[0].switch.temperature
        mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
        means = [i[0] for i in mean_hists]
        hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
        val = {"switch_temperature": temp}
        for i in range(len(means)):
            val["switch_%i_specificity" % (i,)] = means[i]
            val["switch_%i_histogram" % (i,)] = hists[i]
        return val
--- a/codes/models/archs/SwitchedResidualGenerator_arch.py
+++ b/codes/models/archs/SwitchedResidualGenerator_arch.py
@ -8,6 +8,7 @@ from models.archs.arch_util import ConvBnLelu, ConvGnSilu, ExpansionBlock, Expan
 from switched_conv.switched_conv_util import save_attention_to_image_rgb
 import os
 from models.archs.spinenet_arch import SpineNet
 import torchvision
 # VGG-style layer with Conv(stride2)->BN->Activation->Conv->BN->Activation
 # Doubles the input filter count.
@ -533,6 +534,64 @@ class QueryKeyPyramidMultiplexer(nn.Module):
        return v.view(b, t, h, w)
 # Base class for models that utilize ConfigurableSwitchComputer. Provides basis functionality like logging
 # switch temperature, distribution and images, as well as managing attention norms.
 class SwitchModelBase(nn.Module):
    def __init__(self, init_temperature=10, final_temperature_step=10000):
        super(SwitchModelBase, self).__init__()
        self.switches = []  # The implementing class is expected to set this to a list of all ConfigurableSwitchComputers.
        self.attentions = []  # The implementing class is expected to set this in forward() to the output of the attention blocks.
        self.lr = None  # The implementing class is expected to set this to the input image fed into the generator. If not
                        # set, the attention logger will not output an image reference.
        self.init_temperature = init_temperature
        self.final_temperature_step = final_temperature_step
    def set_temperature(self, temp):
        [sw.set_temperature(temp) for sw in self.switches]
    def update_for_step(self, step, experiments_path='.'):
        # All-reduce the attention norm.
        for sw in self.switches:
            sw.switch.reduce_norm_params()
        temp = max(1, 1 + self.init_temperature *
                   (self.final_temperature_step - step) / self.final_temperature_step)
        self.set_temperature(temp)
        if step % 200 == 0:
            output_path = os.path.join(experiments_path, "attention_maps")
            prefix = "amap_%i_a%i_%%i.png"
            [save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step,
                                         output_mag=False) for i in range(len(self.attentions))]
            if self.lr:
                torchvision.utils.save_image(self.lr[:, :3], os.path.join(experiments_path, "attention_maps",
                                                                          "amap_%i_base_image.png" % (step,)))
    # This is a bit awkward. We want this plot to show up in TB as a histogram, but we are getting an intensity
    # plot out of the attention norm tensor. So we need to convert it back into a list of indexes, then feed into TB.
    def compute_anorm_histogram(self):
        intensities = [sw.switch.attention_norm.compute_buffer_norm().clone().detach().cpu() for sw in self.switches]
        result = []
        for intensity in intensities:
            intensity = intensity * 10
            bins = torch.tensor(list(range(len(intensity))))
            intensity = intensity.long()
            result.append(bins.repeat_interleave(intensity, 0))
        return result
    def get_debug_values(self, step, net_name):
        temp = self.switches[0].switch.temperature
        mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
        means = [i[0] for i in mean_hists]
        hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
        anorms = self.compute_anorm_histogram()
        val = {"switch_temperature": temp}
        for i in range(len(means)):
            val["switch_%i_specificity" % (i,)] = means[i]
            val["switch_%i_histogram" % (i,)] = hists[i]
            val["switch_%i_attention_norm_histogram" % (i,)] = anorms[i]
        return val
 if __name__ == '__main__':
    bb = BackboneEncoder(64)
    emb = QueryKeyMultiplexer(64, 10)
--- a/codes/train2.py
+++ b/codes/train2.py
@ -32,7 +32,7 @@ def init_dist(backend='nccl', **kwargs):
 def main():
    #### options
    parser = argparse.ArgumentParser()
-    parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_teco_vix_stacked_rrdb.yml')
+    parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_exd_imgset_ssgr.yml')
    parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher')
    parser.add_argument('--local_rank', type=int, default=0)
    args = parser.parse_args()