Converge SSG architectures into unified switching base class
Also adds attention norm histogram to logging
This commit is contained in:
parent
3cc56cd00b
commit
4c85ee51a4
|
@ -1,7 +1,7 @@
|
||||||
import math
|
import math
|
||||||
import functools
|
import functools
|
||||||
from models.archs.arch_util import MultiConvBlock, ConvGnLelu, ConvGnSilu, ReferenceJoinBlock
|
from models.archs.arch_util import MultiConvBlock, ConvGnLelu, ConvGnSilu, ReferenceJoinBlock
|
||||||
from models.archs.SwitchedResidualGenerator_arch import ConfigurableSwitchComputer, gather_2d
|
from models.archs.SwitchedResidualGenerator_arch import ConfigurableSwitchComputer, gather_2d, SwitchModelBase
|
||||||
from models.archs.SPSR_arch import ImageGradientNoPadding
|
from models.archs.SPSR_arch import ImageGradientNoPadding
|
||||||
from torch import nn
|
from torch import nn
|
||||||
import torch
|
import torch
|
||||||
|
@ -152,9 +152,9 @@ class SwitchWithReference(nn.Module):
|
||||||
return self.switch(x, True, identity=x, att_in=(x, mplex_ref))
|
return self.switch(x, True, identity=x, att_in=(x, mplex_ref))
|
||||||
|
|
||||||
|
|
||||||
class SSGr1(nn.Module):
|
class SSGr1(SwitchModelBase):
|
||||||
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
|
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
|
||||||
super(SSGr1, self).__init__()
|
super(SSGr1, self).__init__(init_temperature, 10000)
|
||||||
n_upscale = int(math.log(upscale, 2))
|
n_upscale = int(math.log(upscale, 2))
|
||||||
self.nf = nf
|
self.nf = nf
|
||||||
|
|
||||||
|
@ -180,10 +180,6 @@ class SSGr1(nn.Module):
|
||||||
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
|
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
|
||||||
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
|
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
|
||||||
self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch]
|
self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch]
|
||||||
self.attentions = None
|
|
||||||
self.lr = None
|
|
||||||
self.init_temperature = init_temperature
|
|
||||||
self.final_temperature_step = 10000
|
|
||||||
|
|
||||||
def forward(self, x, ref, ref_center, save_attentions=True):
|
def forward(self, x, ref, ref_center, save_attentions=True):
|
||||||
# The attention_maps debugger outputs <x>. Save that here.
|
# The attention_maps debugger outputs <x>. Save that here.
|
||||||
|
@ -218,39 +214,10 @@ class SSGr1(nn.Module):
|
||||||
self.fea_grad_std = fea_grad_std.detach().cpu()
|
self.fea_grad_std = fea_grad_std.detach().cpu()
|
||||||
return x_grad_out, x_out, x_grad
|
return x_grad_out, x_out, x_grad
|
||||||
|
|
||||||
def set_temperature(self, temp):
|
|
||||||
[sw.set_temperature(temp) for sw in self.switches]
|
|
||||||
|
|
||||||
def update_for_step(self, step, experiments_path='.'):
|
class StackedSwitchGenerator(SwitchModelBase):
|
||||||
if self.attentions:
|
|
||||||
temp = max(1, 1 + self.init_temperature *
|
|
||||||
(self.final_temperature_step - step) / self.final_temperature_step)
|
|
||||||
self.set_temperature(temp)
|
|
||||||
if step % 200 == 0:
|
|
||||||
output_path = os.path.join(experiments_path, "attention_maps")
|
|
||||||
prefix = "amap_%i_a%i_%%i.png"
|
|
||||||
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))]
|
|
||||||
torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,)))
|
|
||||||
|
|
||||||
|
|
||||||
def get_debug_values(self, step, net_name):
|
|
||||||
if self.attentions:
|
|
||||||
temp = self.switches[0].switch.temperature
|
|
||||||
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
|
|
||||||
means = [i[0] for i in mean_hists]
|
|
||||||
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
|
|
||||||
val = {"switch_temperature": temp,
|
|
||||||
"grad_branch_feat_intg_std_dev": self.grad_fea_std,
|
|
||||||
"conjoin_branch_grad_intg_std_dev": self.fea_grad_std}
|
|
||||||
for i in range(len(means)):
|
|
||||||
val["switch_%i_specificity" % (i,)] = means[i]
|
|
||||||
val["switch_%i_histogram" % (i,)] = hists[i]
|
|
||||||
return val
|
|
||||||
|
|
||||||
|
|
||||||
class StackedSwitchGenerator(nn.Module):
|
|
||||||
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
|
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
|
||||||
super(StackedSwitchGenerator, self).__init__()
|
super(StackedSwitchGenerator, self).__init__(init_temperature, 10000)
|
||||||
n_upscale = int(math.log(upscale, 2))
|
n_upscale = int(math.log(upscale, 2))
|
||||||
self.nf = nf
|
self.nf = nf
|
||||||
|
|
||||||
|
@ -268,10 +235,6 @@ class StackedSwitchGenerator(nn.Module):
|
||||||
self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True)
|
self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True)
|
||||||
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
|
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
|
||||||
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
|
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
|
||||||
self.attentions = None
|
|
||||||
self.lr = None
|
|
||||||
self.init_temperature = init_temperature
|
|
||||||
self.final_temperature_step = 10000
|
|
||||||
|
|
||||||
def forward(self, x, ref, ref_center, save_attentions=True):
|
def forward(self, x, ref, ref_center, save_attentions=True):
|
||||||
# The attention_maps debugger outputs <x>. Save that here.
|
# The attention_maps debugger outputs <x>. Save that here.
|
||||||
|
@ -292,36 +255,10 @@ class StackedSwitchGenerator(nn.Module):
|
||||||
self.attentions = [a1, a3, a3]
|
self.attentions = [a1, a3, a3]
|
||||||
return x_out,
|
return x_out,
|
||||||
|
|
||||||
def set_temperature(self, temp):
|
|
||||||
[sw.set_temperature(temp) for sw in self.switches]
|
|
||||||
|
|
||||||
def update_for_step(self, step, experiments_path='.'):
|
class SSGDeep(SwitchModelBase):
|
||||||
if self.attentions:
|
|
||||||
temp = max(1, 1 + self.init_temperature *
|
|
||||||
(self.final_temperature_step - step) / self.final_temperature_step)
|
|
||||||
self.set_temperature(temp)
|
|
||||||
if step % 200 == 0:
|
|
||||||
output_path = os.path.join(experiments_path, "attention_maps")
|
|
||||||
prefix = "amap_%i_a%i_%%i.png"
|
|
||||||
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))]
|
|
||||||
torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,)))
|
|
||||||
|
|
||||||
|
|
||||||
def get_debug_values(self, step, net_name):
|
|
||||||
temp = self.switches[0].switch.temperature
|
|
||||||
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
|
|
||||||
means = [i[0] for i in mean_hists]
|
|
||||||
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
|
|
||||||
val = {"switch_temperature": temp}
|
|
||||||
for i in range(len(means)):
|
|
||||||
val["switch_%i_specificity" % (i,)] = means[i]
|
|
||||||
val["switch_%i_histogram" % (i,)] = hists[i]
|
|
||||||
return val
|
|
||||||
|
|
||||||
|
|
||||||
class SSGDeep(nn.Module):
|
|
||||||
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
|
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
|
||||||
super(SSGDeep, self).__init__()
|
super(SSGDeep, self).__init__(init_temperature, 10000)
|
||||||
n_upscale = int(math.log(upscale, 2))
|
n_upscale = int(math.log(upscale, 2))
|
||||||
self.nf = nf
|
self.nf = nf
|
||||||
|
|
||||||
|
@ -349,10 +286,6 @@ class SSGDeep(nn.Module):
|
||||||
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
|
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
|
||||||
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
|
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
|
||||||
self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch, self.sw3.switch, self.sw4.switch]
|
self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch, self.sw3.switch, self.sw4.switch]
|
||||||
self.attentions = None
|
|
||||||
self.lr = None
|
|
||||||
self.init_temperature = init_temperature
|
|
||||||
self.final_temperature_step = 10000
|
|
||||||
|
|
||||||
def forward(self, x, ref, ref_center, save_attentions=True):
|
def forward(self, x, ref, ref_center, save_attentions=True):
|
||||||
# The attention_maps debugger outputs <x>. Save that here.
|
# The attention_maps debugger outputs <x>. Save that here.
|
||||||
|
@ -389,38 +322,10 @@ class SSGDeep(nn.Module):
|
||||||
self.fea_grad_std = fea_grad_std.detach().cpu()
|
self.fea_grad_std = fea_grad_std.detach().cpu()
|
||||||
return x_grad_out, x_out, x_grad
|
return x_grad_out, x_out, x_grad
|
||||||
|
|
||||||
def set_temperature(self, temp):
|
|
||||||
[sw.set_temperature(temp) for sw in self.switches]
|
|
||||||
|
|
||||||
def update_for_step(self, step, experiments_path='.'):
|
class StackedSwitchGenerator5Layer(SwitchModelBase):
|
||||||
if self.attentions:
|
|
||||||
temp = max(1, 1 + self.init_temperature *
|
|
||||||
(self.final_temperature_step - step) / self.final_temperature_step)
|
|
||||||
self.set_temperature(temp)
|
|
||||||
if step % 200 == 0:
|
|
||||||
output_path = os.path.join(experiments_path, "attention_maps")
|
|
||||||
prefix = "amap_%i_a%i_%%i.png"
|
|
||||||
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))]
|
|
||||||
torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,)))
|
|
||||||
|
|
||||||
|
|
||||||
def get_debug_values(self, step, net_name):
|
|
||||||
temp = self.switches[0].switch.temperature
|
|
||||||
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
|
|
||||||
means = [i[0] for i in mean_hists]
|
|
||||||
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
|
|
||||||
val = {"switch_temperature": temp,
|
|
||||||
"grad_branch_feat_intg_std_dev": self.grad_fea_std,
|
|
||||||
"conjoin_branch_grad_intg_std_dev": self.fea_grad_std}
|
|
||||||
for i in range(len(means)):
|
|
||||||
val["switch_%i_specificity" % (i,)] = means[i]
|
|
||||||
val["switch_%i_histogram" % (i,)] = hists[i]
|
|
||||||
return val
|
|
||||||
|
|
||||||
|
|
||||||
class StackedSwitchGenerator5Layer(nn.Module):
|
|
||||||
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
|
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
|
||||||
super(StackedSwitchGenerator5Layer, self).__init__()
|
super(StackedSwitchGenerator5Layer, self).__init__(init_temperature, 10000)
|
||||||
n_upscale = int(math.log(upscale, 2))
|
n_upscale = int(math.log(upscale, 2))
|
||||||
self.nf = nf
|
self.nf = nf
|
||||||
|
|
||||||
|
@ -440,10 +345,6 @@ class StackedSwitchGenerator5Layer(nn.Module):
|
||||||
self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True)
|
self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True)
|
||||||
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
|
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
|
||||||
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
|
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
|
||||||
self.attentions = None
|
|
||||||
self.lr = None
|
|
||||||
self.init_temperature = init_temperature
|
|
||||||
self.final_temperature_step = 10000
|
|
||||||
|
|
||||||
def forward(self, x, ref, ref_center, save_attentions=True):
|
def forward(self, x, ref, ref_center, save_attentions=True):
|
||||||
# The attention_maps debugger outputs <x>. Save that here.
|
# The attention_maps debugger outputs <x>. Save that here.
|
||||||
|
@ -471,33 +372,3 @@ class StackedSwitchGenerator5Layer(nn.Module):
|
||||||
self.attentions = [a1, a3, a3, a4, a5]
|
self.attentions = [a1, a3, a3, a4, a5]
|
||||||
return x_out,
|
return x_out,
|
||||||
|
|
||||||
def set_temperature(self, temp):
|
|
||||||
[sw.set_temperature(temp) for sw in self.switches]
|
|
||||||
|
|
||||||
def update_for_step(self, step, experiments_path='.'):
|
|
||||||
if self.attentions:
|
|
||||||
# All-reduce the attention norm.
|
|
||||||
for sw in self.switches:
|
|
||||||
sw.switch.reduce_norm_params()
|
|
||||||
|
|
||||||
temp = max(1, 1 + self.init_temperature *
|
|
||||||
(self.final_temperature_step - step) / self.final_temperature_step)
|
|
||||||
self.set_temperature(temp)
|
|
||||||
if step % 200 == 0:
|
|
||||||
output_path = os.path.join(experiments_path, "attention_maps")
|
|
||||||
prefix = "amap_%i_a%i_%%i.png"
|
|
||||||
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step,
|
|
||||||
output_mag=False) for i in range(len(self.attentions))]
|
|
||||||
torchvision.utils.save_image(self.lr[:,:3], os.path.join(experiments_path, "attention_maps",
|
|
||||||
"amap_%i_base_image.png" % (step,)))
|
|
||||||
|
|
||||||
def get_debug_values(self, step, net_name):
|
|
||||||
temp = self.switches[0].switch.temperature
|
|
||||||
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
|
|
||||||
means = [i[0] for i in mean_hists]
|
|
||||||
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
|
|
||||||
val = {"switch_temperature": temp}
|
|
||||||
for i in range(len(means)):
|
|
||||||
val["switch_%i_specificity" % (i,)] = means[i]
|
|
||||||
val["switch_%i_histogram" % (i,)] = hists[i]
|
|
||||||
return val
|
|
||||||
|
|
|
@ -8,6 +8,7 @@ from models.archs.arch_util import ConvBnLelu, ConvGnSilu, ExpansionBlock, Expan
|
||||||
from switched_conv.switched_conv_util import save_attention_to_image_rgb
|
from switched_conv.switched_conv_util import save_attention_to_image_rgb
|
||||||
import os
|
import os
|
||||||
from models.archs.spinenet_arch import SpineNet
|
from models.archs.spinenet_arch import SpineNet
|
||||||
|
import torchvision
|
||||||
|
|
||||||
# VGG-style layer with Conv(stride2)->BN->Activation->Conv->BN->Activation
|
# VGG-style layer with Conv(stride2)->BN->Activation->Conv->BN->Activation
|
||||||
# Doubles the input filter count.
|
# Doubles the input filter count.
|
||||||
|
@ -533,6 +534,64 @@ class QueryKeyPyramidMultiplexer(nn.Module):
|
||||||
return v.view(b, t, h, w)
|
return v.view(b, t, h, w)
|
||||||
|
|
||||||
|
|
||||||
|
# Base class for models that utilize ConfigurableSwitchComputer. Provides basis functionality like logging
|
||||||
|
# switch temperature, distribution and images, as well as managing attention norms.
|
||||||
|
class SwitchModelBase(nn.Module):
|
||||||
|
def __init__(self, init_temperature=10, final_temperature_step=10000):
|
||||||
|
super(SwitchModelBase, self).__init__()
|
||||||
|
self.switches = [] # The implementing class is expected to set this to a list of all ConfigurableSwitchComputers.
|
||||||
|
self.attentions = [] # The implementing class is expected to set this in forward() to the output of the attention blocks.
|
||||||
|
self.lr = None # The implementing class is expected to set this to the input image fed into the generator. If not
|
||||||
|
# set, the attention logger will not output an image reference.
|
||||||
|
self.init_temperature = init_temperature
|
||||||
|
self.final_temperature_step = final_temperature_step
|
||||||
|
|
||||||
|
def set_temperature(self, temp):
|
||||||
|
[sw.set_temperature(temp) for sw in self.switches]
|
||||||
|
|
||||||
|
def update_for_step(self, step, experiments_path='.'):
|
||||||
|
# All-reduce the attention norm.
|
||||||
|
for sw in self.switches:
|
||||||
|
sw.switch.reduce_norm_params()
|
||||||
|
|
||||||
|
temp = max(1, 1 + self.init_temperature *
|
||||||
|
(self.final_temperature_step - step) / self.final_temperature_step)
|
||||||
|
self.set_temperature(temp)
|
||||||
|
if step % 200 == 0:
|
||||||
|
output_path = os.path.join(experiments_path, "attention_maps")
|
||||||
|
prefix = "amap_%i_a%i_%%i.png"
|
||||||
|
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step,
|
||||||
|
output_mag=False) for i in range(len(self.attentions))]
|
||||||
|
if self.lr:
|
||||||
|
torchvision.utils.save_image(self.lr[:, :3], os.path.join(experiments_path, "attention_maps",
|
||||||
|
"amap_%i_base_image.png" % (step,)))
|
||||||
|
|
||||||
|
# This is a bit awkward. We want this plot to show up in TB as a histogram, but we are getting an intensity
|
||||||
|
# plot out of the attention norm tensor. So we need to convert it back into a list of indexes, then feed into TB.
|
||||||
|
def compute_anorm_histogram(self):
|
||||||
|
intensities = [sw.switch.attention_norm.compute_buffer_norm().clone().detach().cpu() for sw in self.switches]
|
||||||
|
result = []
|
||||||
|
for intensity in intensities:
|
||||||
|
intensity = intensity * 10
|
||||||
|
bins = torch.tensor(list(range(len(intensity))))
|
||||||
|
intensity = intensity.long()
|
||||||
|
result.append(bins.repeat_interleave(intensity, 0))
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_debug_values(self, step, net_name):
|
||||||
|
temp = self.switches[0].switch.temperature
|
||||||
|
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
|
||||||
|
means = [i[0] for i in mean_hists]
|
||||||
|
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
|
||||||
|
anorms = self.compute_anorm_histogram()
|
||||||
|
val = {"switch_temperature": temp}
|
||||||
|
for i in range(len(means)):
|
||||||
|
val["switch_%i_specificity" % (i,)] = means[i]
|
||||||
|
val["switch_%i_histogram" % (i,)] = hists[i]
|
||||||
|
val["switch_%i_attention_norm_histogram" % (i,)] = anorms[i]
|
||||||
|
return val
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
bb = BackboneEncoder(64)
|
bb = BackboneEncoder(64)
|
||||||
emb = QueryKeyMultiplexer(64, 10)
|
emb = QueryKeyMultiplexer(64, 10)
|
||||||
|
|
|
@ -32,7 +32,7 @@ def init_dist(backend='nccl', **kwargs):
|
||||||
def main():
|
def main():
|
||||||
#### options
|
#### options
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_teco_vix_stacked_rrdb.yml')
|
parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_exd_imgset_ssgr.yml')
|
||||||
parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher')
|
parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher')
|
||||||
parser.add_argument('--local_rank', type=int, default=0)
|
parser.add_argument('--local_rank', type=int, default=0)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user