Converge SSG architectures into unified switching base class
Also adds attention norm histogram to logging
This commit is contained in:
parent
3cc56cd00b
commit
4c85ee51a4
|
@ -1,7 +1,7 @@
|
|||
import math
|
||||
import functools
|
||||
from models.archs.arch_util import MultiConvBlock, ConvGnLelu, ConvGnSilu, ReferenceJoinBlock
|
||||
from models.archs.SwitchedResidualGenerator_arch import ConfigurableSwitchComputer, gather_2d
|
||||
from models.archs.SwitchedResidualGenerator_arch import ConfigurableSwitchComputer, gather_2d, SwitchModelBase
|
||||
from models.archs.SPSR_arch import ImageGradientNoPadding
|
||||
from torch import nn
|
||||
import torch
|
||||
|
@ -152,9 +152,9 @@ class SwitchWithReference(nn.Module):
|
|||
return self.switch(x, True, identity=x, att_in=(x, mplex_ref))
|
||||
|
||||
|
||||
class SSGr1(nn.Module):
|
||||
class SSGr1(SwitchModelBase):
|
||||
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
|
||||
super(SSGr1, self).__init__()
|
||||
super(SSGr1, self).__init__(init_temperature, 10000)
|
||||
n_upscale = int(math.log(upscale, 2))
|
||||
self.nf = nf
|
||||
|
||||
|
@ -180,10 +180,6 @@ class SSGr1(nn.Module):
|
|||
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
|
||||
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
|
||||
self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch]
|
||||
self.attentions = None
|
||||
self.lr = None
|
||||
self.init_temperature = init_temperature
|
||||
self.final_temperature_step = 10000
|
||||
|
||||
def forward(self, x, ref, ref_center, save_attentions=True):
|
||||
# The attention_maps debugger outputs <x>. Save that here.
|
||||
|
@ -218,39 +214,10 @@ class SSGr1(nn.Module):
|
|||
self.fea_grad_std = fea_grad_std.detach().cpu()
|
||||
return x_grad_out, x_out, x_grad
|
||||
|
||||
def set_temperature(self, temp):
|
||||
[sw.set_temperature(temp) for sw in self.switches]
|
||||
|
||||
def update_for_step(self, step, experiments_path='.'):
|
||||
if self.attentions:
|
||||
temp = max(1, 1 + self.init_temperature *
|
||||
(self.final_temperature_step - step) / self.final_temperature_step)
|
||||
self.set_temperature(temp)
|
||||
if step % 200 == 0:
|
||||
output_path = os.path.join(experiments_path, "attention_maps")
|
||||
prefix = "amap_%i_a%i_%%i.png"
|
||||
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))]
|
||||
torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,)))
|
||||
|
||||
|
||||
def get_debug_values(self, step, net_name):
|
||||
if self.attentions:
|
||||
temp = self.switches[0].switch.temperature
|
||||
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
|
||||
means = [i[0] for i in mean_hists]
|
||||
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
|
||||
val = {"switch_temperature": temp,
|
||||
"grad_branch_feat_intg_std_dev": self.grad_fea_std,
|
||||
"conjoin_branch_grad_intg_std_dev": self.fea_grad_std}
|
||||
for i in range(len(means)):
|
||||
val["switch_%i_specificity" % (i,)] = means[i]
|
||||
val["switch_%i_histogram" % (i,)] = hists[i]
|
||||
return val
|
||||
|
||||
|
||||
class StackedSwitchGenerator(nn.Module):
|
||||
class StackedSwitchGenerator(SwitchModelBase):
|
||||
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
|
||||
super(StackedSwitchGenerator, self).__init__()
|
||||
super(StackedSwitchGenerator, self).__init__(init_temperature, 10000)
|
||||
n_upscale = int(math.log(upscale, 2))
|
||||
self.nf = nf
|
||||
|
||||
|
@ -268,10 +235,6 @@ class StackedSwitchGenerator(nn.Module):
|
|||
self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True)
|
||||
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
|
||||
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
|
||||
self.attentions = None
|
||||
self.lr = None
|
||||
self.init_temperature = init_temperature
|
||||
self.final_temperature_step = 10000
|
||||
|
||||
def forward(self, x, ref, ref_center, save_attentions=True):
|
||||
# The attention_maps debugger outputs <x>. Save that here.
|
||||
|
@ -292,36 +255,10 @@ class StackedSwitchGenerator(nn.Module):
|
|||
self.attentions = [a1, a3, a3]
|
||||
return x_out,
|
||||
|
||||
def set_temperature(self, temp):
|
||||
[sw.set_temperature(temp) for sw in self.switches]
|
||||
|
||||
def update_for_step(self, step, experiments_path='.'):
|
||||
if self.attentions:
|
||||
temp = max(1, 1 + self.init_temperature *
|
||||
(self.final_temperature_step - step) / self.final_temperature_step)
|
||||
self.set_temperature(temp)
|
||||
if step % 200 == 0:
|
||||
output_path = os.path.join(experiments_path, "attention_maps")
|
||||
prefix = "amap_%i_a%i_%%i.png"
|
||||
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))]
|
||||
torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,)))
|
||||
|
||||
|
||||
def get_debug_values(self, step, net_name):
|
||||
temp = self.switches[0].switch.temperature
|
||||
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
|
||||
means = [i[0] for i in mean_hists]
|
||||
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
|
||||
val = {"switch_temperature": temp}
|
||||
for i in range(len(means)):
|
||||
val["switch_%i_specificity" % (i,)] = means[i]
|
||||
val["switch_%i_histogram" % (i,)] = hists[i]
|
||||
return val
|
||||
|
||||
|
||||
class SSGDeep(nn.Module):
|
||||
class SSGDeep(SwitchModelBase):
|
||||
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
|
||||
super(SSGDeep, self).__init__()
|
||||
super(SSGDeep, self).__init__(init_temperature, 10000)
|
||||
n_upscale = int(math.log(upscale, 2))
|
||||
self.nf = nf
|
||||
|
||||
|
@ -349,10 +286,6 @@ class SSGDeep(nn.Module):
|
|||
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
|
||||
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
|
||||
self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch, self.sw3.switch, self.sw4.switch]
|
||||
self.attentions = None
|
||||
self.lr = None
|
||||
self.init_temperature = init_temperature
|
||||
self.final_temperature_step = 10000
|
||||
|
||||
def forward(self, x, ref, ref_center, save_attentions=True):
|
||||
# The attention_maps debugger outputs <x>. Save that here.
|
||||
|
@ -389,38 +322,10 @@ class SSGDeep(nn.Module):
|
|||
self.fea_grad_std = fea_grad_std.detach().cpu()
|
||||
return x_grad_out, x_out, x_grad
|
||||
|
||||
def set_temperature(self, temp):
|
||||
[sw.set_temperature(temp) for sw in self.switches]
|
||||
|
||||
def update_for_step(self, step, experiments_path='.'):
|
||||
if self.attentions:
|
||||
temp = max(1, 1 + self.init_temperature *
|
||||
(self.final_temperature_step - step) / self.final_temperature_step)
|
||||
self.set_temperature(temp)
|
||||
if step % 200 == 0:
|
||||
output_path = os.path.join(experiments_path, "attention_maps")
|
||||
prefix = "amap_%i_a%i_%%i.png"
|
||||
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))]
|
||||
torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,)))
|
||||
|
||||
|
||||
def get_debug_values(self, step, net_name):
|
||||
temp = self.switches[0].switch.temperature
|
||||
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
|
||||
means = [i[0] for i in mean_hists]
|
||||
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
|
||||
val = {"switch_temperature": temp,
|
||||
"grad_branch_feat_intg_std_dev": self.grad_fea_std,
|
||||
"conjoin_branch_grad_intg_std_dev": self.fea_grad_std}
|
||||
for i in range(len(means)):
|
||||
val["switch_%i_specificity" % (i,)] = means[i]
|
||||
val["switch_%i_histogram" % (i,)] = hists[i]
|
||||
return val
|
||||
|
||||
|
||||
class StackedSwitchGenerator5Layer(nn.Module):
|
||||
class StackedSwitchGenerator5Layer(SwitchModelBase):
|
||||
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
|
||||
super(StackedSwitchGenerator5Layer, self).__init__()
|
||||
super(StackedSwitchGenerator5Layer, self).__init__(init_temperature, 10000)
|
||||
n_upscale = int(math.log(upscale, 2))
|
||||
self.nf = nf
|
||||
|
||||
|
@ -440,10 +345,6 @@ class StackedSwitchGenerator5Layer(nn.Module):
|
|||
self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True)
|
||||
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
|
||||
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
|
||||
self.attentions = None
|
||||
self.lr = None
|
||||
self.init_temperature = init_temperature
|
||||
self.final_temperature_step = 10000
|
||||
|
||||
def forward(self, x, ref, ref_center, save_attentions=True):
|
||||
# The attention_maps debugger outputs <x>. Save that here.
|
||||
|
@ -471,33 +372,3 @@ class StackedSwitchGenerator5Layer(nn.Module):
|
|||
self.attentions = [a1, a3, a3, a4, a5]
|
||||
return x_out,
|
||||
|
||||
def set_temperature(self, temp):
|
||||
[sw.set_temperature(temp) for sw in self.switches]
|
||||
|
||||
def update_for_step(self, step, experiments_path='.'):
|
||||
if self.attentions:
|
||||
# All-reduce the attention norm.
|
||||
for sw in self.switches:
|
||||
sw.switch.reduce_norm_params()
|
||||
|
||||
temp = max(1, 1 + self.init_temperature *
|
||||
(self.final_temperature_step - step) / self.final_temperature_step)
|
||||
self.set_temperature(temp)
|
||||
if step % 200 == 0:
|
||||
output_path = os.path.join(experiments_path, "attention_maps")
|
||||
prefix = "amap_%i_a%i_%%i.png"
|
||||
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step,
|
||||
output_mag=False) for i in range(len(self.attentions))]
|
||||
torchvision.utils.save_image(self.lr[:,:3], os.path.join(experiments_path, "attention_maps",
|
||||
"amap_%i_base_image.png" % (step,)))
|
||||
|
||||
def get_debug_values(self, step, net_name):
|
||||
temp = self.switches[0].switch.temperature
|
||||
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
|
||||
means = [i[0] for i in mean_hists]
|
||||
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
|
||||
val = {"switch_temperature": temp}
|
||||
for i in range(len(means)):
|
||||
val["switch_%i_specificity" % (i,)] = means[i]
|
||||
val["switch_%i_histogram" % (i,)] = hists[i]
|
||||
return val
|
||||
|
|
|
@ -8,6 +8,7 @@ from models.archs.arch_util import ConvBnLelu, ConvGnSilu, ExpansionBlock, Expan
|
|||
from switched_conv.switched_conv_util import save_attention_to_image_rgb
|
||||
import os
|
||||
from models.archs.spinenet_arch import SpineNet
|
||||
import torchvision
|
||||
|
||||
# VGG-style layer with Conv(stride2)->BN->Activation->Conv->BN->Activation
|
||||
# Doubles the input filter count.
|
||||
|
@ -533,6 +534,64 @@ class QueryKeyPyramidMultiplexer(nn.Module):
|
|||
return v.view(b, t, h, w)
|
||||
|
||||
|
||||
# Base class for models that utilize ConfigurableSwitchComputer. Provides basis functionality like logging
|
||||
# switch temperature, distribution and images, as well as managing attention norms.
|
||||
class SwitchModelBase(nn.Module):
|
||||
def __init__(self, init_temperature=10, final_temperature_step=10000):
|
||||
super(SwitchModelBase, self).__init__()
|
||||
self.switches = [] # The implementing class is expected to set this to a list of all ConfigurableSwitchComputers.
|
||||
self.attentions = [] # The implementing class is expected to set this in forward() to the output of the attention blocks.
|
||||
self.lr = None # The implementing class is expected to set this to the input image fed into the generator. If not
|
||||
# set, the attention logger will not output an image reference.
|
||||
self.init_temperature = init_temperature
|
||||
self.final_temperature_step = final_temperature_step
|
||||
|
||||
def set_temperature(self, temp):
|
||||
[sw.set_temperature(temp) for sw in self.switches]
|
||||
|
||||
def update_for_step(self, step, experiments_path='.'):
|
||||
# All-reduce the attention norm.
|
||||
for sw in self.switches:
|
||||
sw.switch.reduce_norm_params()
|
||||
|
||||
temp = max(1, 1 + self.init_temperature *
|
||||
(self.final_temperature_step - step) / self.final_temperature_step)
|
||||
self.set_temperature(temp)
|
||||
if step % 200 == 0:
|
||||
output_path = os.path.join(experiments_path, "attention_maps")
|
||||
prefix = "amap_%i_a%i_%%i.png"
|
||||
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step,
|
||||
output_mag=False) for i in range(len(self.attentions))]
|
||||
if self.lr:
|
||||
torchvision.utils.save_image(self.lr[:, :3], os.path.join(experiments_path, "attention_maps",
|
||||
"amap_%i_base_image.png" % (step,)))
|
||||
|
||||
# This is a bit awkward. We want this plot to show up in TB as a histogram, but we are getting an intensity
|
||||
# plot out of the attention norm tensor. So we need to convert it back into a list of indexes, then feed into TB.
|
||||
def compute_anorm_histogram(self):
|
||||
intensities = [sw.switch.attention_norm.compute_buffer_norm().clone().detach().cpu() for sw in self.switches]
|
||||
result = []
|
||||
for intensity in intensities:
|
||||
intensity = intensity * 10
|
||||
bins = torch.tensor(list(range(len(intensity))))
|
||||
intensity = intensity.long()
|
||||
result.append(bins.repeat_interleave(intensity, 0))
|
||||
return result
|
||||
|
||||
def get_debug_values(self, step, net_name):
|
||||
temp = self.switches[0].switch.temperature
|
||||
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
|
||||
means = [i[0] for i in mean_hists]
|
||||
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
|
||||
anorms = self.compute_anorm_histogram()
|
||||
val = {"switch_temperature": temp}
|
||||
for i in range(len(means)):
|
||||
val["switch_%i_specificity" % (i,)] = means[i]
|
||||
val["switch_%i_histogram" % (i,)] = hists[i]
|
||||
val["switch_%i_attention_norm_histogram" % (i,)] = anorms[i]
|
||||
return val
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
bb = BackboneEncoder(64)
|
||||
emb = QueryKeyMultiplexer(64, 10)
|
||||
|
|
|
@ -32,7 +32,7 @@ def init_dist(backend='nccl', **kwargs):
|
|||
def main():
|
||||
#### options
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_teco_vix_stacked_rrdb.yml')
|
||||
parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_exd_imgset_ssgr.yml')
|
||||
parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher')
|
||||
parser.add_argument('--local_rank', type=int, default=0)
|
||||
args = parser.parse_args()
|
||||
|
|
Loading…
Reference in New Issue
Block a user