Converge SSG architectures into unified switching base class

Also adds attention norm histogram to logging
This commit is contained in:
James Betker 2020-10-08 17:23:21 -06:00
parent 3cc56cd00b
commit 4c85ee51a4
3 changed files with 69 additions and 139 deletions

View File

@ -1,7 +1,7 @@
import math import math
import functools import functools
from models.archs.arch_util import MultiConvBlock, ConvGnLelu, ConvGnSilu, ReferenceJoinBlock from models.archs.arch_util import MultiConvBlock, ConvGnLelu, ConvGnSilu, ReferenceJoinBlock
from models.archs.SwitchedResidualGenerator_arch import ConfigurableSwitchComputer, gather_2d from models.archs.SwitchedResidualGenerator_arch import ConfigurableSwitchComputer, gather_2d, SwitchModelBase
from models.archs.SPSR_arch import ImageGradientNoPadding from models.archs.SPSR_arch import ImageGradientNoPadding
from torch import nn from torch import nn
import torch import torch
@ -152,9 +152,9 @@ class SwitchWithReference(nn.Module):
return self.switch(x, True, identity=x, att_in=(x, mplex_ref)) return self.switch(x, True, identity=x, att_in=(x, mplex_ref))
class SSGr1(nn.Module): class SSGr1(SwitchModelBase):
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10): def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
super(SSGr1, self).__init__() super(SSGr1, self).__init__(init_temperature, 10000)
n_upscale = int(math.log(upscale, 2)) n_upscale = int(math.log(upscale, 2))
self.nf = nf self.nf = nf
@ -180,10 +180,6 @@ class SSGr1(nn.Module):
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True) self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False) self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch] self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch]
self.attentions = None
self.lr = None
self.init_temperature = init_temperature
self.final_temperature_step = 10000
def forward(self, x, ref, ref_center, save_attentions=True): def forward(self, x, ref, ref_center, save_attentions=True):
# The attention_maps debugger outputs <x>. Save that here. # The attention_maps debugger outputs <x>. Save that here.
@ -218,39 +214,10 @@ class SSGr1(nn.Module):
self.fea_grad_std = fea_grad_std.detach().cpu() self.fea_grad_std = fea_grad_std.detach().cpu()
return x_grad_out, x_out, x_grad return x_grad_out, x_out, x_grad
def set_temperature(self, temp):
[sw.set_temperature(temp) for sw in self.switches]
def update_for_step(self, step, experiments_path='.'): class StackedSwitchGenerator(SwitchModelBase):
if self.attentions:
temp = max(1, 1 + self.init_temperature *
(self.final_temperature_step - step) / self.final_temperature_step)
self.set_temperature(temp)
if step % 200 == 0:
output_path = os.path.join(experiments_path, "attention_maps")
prefix = "amap_%i_a%i_%%i.png"
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))]
torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,)))
def get_debug_values(self, step, net_name):
if self.attentions:
temp = self.switches[0].switch.temperature
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
means = [i[0] for i in mean_hists]
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
val = {"switch_temperature": temp,
"grad_branch_feat_intg_std_dev": self.grad_fea_std,
"conjoin_branch_grad_intg_std_dev": self.fea_grad_std}
for i in range(len(means)):
val["switch_%i_specificity" % (i,)] = means[i]
val["switch_%i_histogram" % (i,)] = hists[i]
return val
class StackedSwitchGenerator(nn.Module):
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10): def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
super(StackedSwitchGenerator, self).__init__() super(StackedSwitchGenerator, self).__init__(init_temperature, 10000)
n_upscale = int(math.log(upscale, 2)) n_upscale = int(math.log(upscale, 2))
self.nf = nf self.nf = nf
@ -268,10 +235,6 @@ class StackedSwitchGenerator(nn.Module):
self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True) self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True)
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True) self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False) self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
self.attentions = None
self.lr = None
self.init_temperature = init_temperature
self.final_temperature_step = 10000
def forward(self, x, ref, ref_center, save_attentions=True): def forward(self, x, ref, ref_center, save_attentions=True):
# The attention_maps debugger outputs <x>. Save that here. # The attention_maps debugger outputs <x>. Save that here.
@ -292,36 +255,10 @@ class StackedSwitchGenerator(nn.Module):
self.attentions = [a1, a3, a3] self.attentions = [a1, a3, a3]
return x_out, return x_out,
def set_temperature(self, temp):
[sw.set_temperature(temp) for sw in self.switches]
def update_for_step(self, step, experiments_path='.'): class SSGDeep(SwitchModelBase):
if self.attentions:
temp = max(1, 1 + self.init_temperature *
(self.final_temperature_step - step) / self.final_temperature_step)
self.set_temperature(temp)
if step % 200 == 0:
output_path = os.path.join(experiments_path, "attention_maps")
prefix = "amap_%i_a%i_%%i.png"
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))]
torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,)))
def get_debug_values(self, step, net_name):
temp = self.switches[0].switch.temperature
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
means = [i[0] for i in mean_hists]
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
val = {"switch_temperature": temp}
for i in range(len(means)):
val["switch_%i_specificity" % (i,)] = means[i]
val["switch_%i_histogram" % (i,)] = hists[i]
return val
class SSGDeep(nn.Module):
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10): def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
super(SSGDeep, self).__init__() super(SSGDeep, self).__init__(init_temperature, 10000)
n_upscale = int(math.log(upscale, 2)) n_upscale = int(math.log(upscale, 2))
self.nf = nf self.nf = nf
@ -349,10 +286,6 @@ class SSGDeep(nn.Module):
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True) self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False) self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch, self.sw3.switch, self.sw4.switch] self.switches = [self.sw1.switch, self.sw_grad.switch, self.conjoin_sw.switch, self.sw3.switch, self.sw4.switch]
self.attentions = None
self.lr = None
self.init_temperature = init_temperature
self.final_temperature_step = 10000
def forward(self, x, ref, ref_center, save_attentions=True): def forward(self, x, ref, ref_center, save_attentions=True):
# The attention_maps debugger outputs <x>. Save that here. # The attention_maps debugger outputs <x>. Save that here.
@ -389,38 +322,10 @@ class SSGDeep(nn.Module):
self.fea_grad_std = fea_grad_std.detach().cpu() self.fea_grad_std = fea_grad_std.detach().cpu()
return x_grad_out, x_out, x_grad return x_grad_out, x_out, x_grad
def set_temperature(self, temp):
[sw.set_temperature(temp) for sw in self.switches]
def update_for_step(self, step, experiments_path='.'): class StackedSwitchGenerator5Layer(SwitchModelBase):
if self.attentions:
temp = max(1, 1 + self.init_temperature *
(self.final_temperature_step - step) / self.final_temperature_step)
self.set_temperature(temp)
if step % 200 == 0:
output_path = os.path.join(experiments_path, "attention_maps")
prefix = "amap_%i_a%i_%%i.png"
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step, output_mag=False) for i in range(len(self.attentions))]
torchvision.utils.save_image(self.lr, os.path.join(experiments_path, "attention_maps", "amap_%i_base_image.png" % (step,)))
def get_debug_values(self, step, net_name):
temp = self.switches[0].switch.temperature
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
means = [i[0] for i in mean_hists]
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
val = {"switch_temperature": temp,
"grad_branch_feat_intg_std_dev": self.grad_fea_std,
"conjoin_branch_grad_intg_std_dev": self.fea_grad_std}
for i in range(len(means)):
val["switch_%i_specificity" % (i,)] = means[i]
val["switch_%i_histogram" % (i,)] = hists[i]
return val
class StackedSwitchGenerator5Layer(nn.Module):
def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10): def __init__(self, in_nc, out_nc, nf, xforms=8, upscale=4, init_temperature=10):
super(StackedSwitchGenerator5Layer, self).__init__() super(StackedSwitchGenerator5Layer, self).__init__(init_temperature, 10000)
n_upscale = int(math.log(upscale, 2)) n_upscale = int(math.log(upscale, 2))
self.nf = nf self.nf = nf
@ -440,10 +345,6 @@ class StackedSwitchGenerator5Layer(nn.Module):
self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True) self.upsample = UpconvBlock(nf, nf // 2, block=ConvGnLelu, norm=False, activation=True, bias=True)
self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True) self.final_hr_conv1 = ConvGnLelu(nf // 2, nf // 2, kernel_size=3, norm=False, activation=False, bias=True)
self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False) self.final_hr_conv2 = ConvGnLelu(nf // 2, out_nc, kernel_size=3, norm=False, activation=False, bias=False)
self.attentions = None
self.lr = None
self.init_temperature = init_temperature
self.final_temperature_step = 10000
def forward(self, x, ref, ref_center, save_attentions=True): def forward(self, x, ref, ref_center, save_attentions=True):
# The attention_maps debugger outputs <x>. Save that here. # The attention_maps debugger outputs <x>. Save that here.
@ -471,33 +372,3 @@ class StackedSwitchGenerator5Layer(nn.Module):
self.attentions = [a1, a3, a3, a4, a5] self.attentions = [a1, a3, a3, a4, a5]
return x_out, return x_out,
def set_temperature(self, temp):
[sw.set_temperature(temp) for sw in self.switches]
def update_for_step(self, step, experiments_path='.'):
if self.attentions:
# All-reduce the attention norm.
for sw in self.switches:
sw.switch.reduce_norm_params()
temp = max(1, 1 + self.init_temperature *
(self.final_temperature_step - step) / self.final_temperature_step)
self.set_temperature(temp)
if step % 200 == 0:
output_path = os.path.join(experiments_path, "attention_maps")
prefix = "amap_%i_a%i_%%i.png"
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step,
output_mag=False) for i in range(len(self.attentions))]
torchvision.utils.save_image(self.lr[:,:3], os.path.join(experiments_path, "attention_maps",
"amap_%i_base_image.png" % (step,)))
def get_debug_values(self, step, net_name):
temp = self.switches[0].switch.temperature
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
means = [i[0] for i in mean_hists]
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
val = {"switch_temperature": temp}
for i in range(len(means)):
val["switch_%i_specificity" % (i,)] = means[i]
val["switch_%i_histogram" % (i,)] = hists[i]
return val

View File

@ -8,6 +8,7 @@ from models.archs.arch_util import ConvBnLelu, ConvGnSilu, ExpansionBlock, Expan
from switched_conv.switched_conv_util import save_attention_to_image_rgb from switched_conv.switched_conv_util import save_attention_to_image_rgb
import os import os
from models.archs.spinenet_arch import SpineNet from models.archs.spinenet_arch import SpineNet
import torchvision
# VGG-style layer with Conv(stride2)->BN->Activation->Conv->BN->Activation # VGG-style layer with Conv(stride2)->BN->Activation->Conv->BN->Activation
# Doubles the input filter count. # Doubles the input filter count.
@ -533,6 +534,64 @@ class QueryKeyPyramidMultiplexer(nn.Module):
return v.view(b, t, h, w) return v.view(b, t, h, w)
# Base class for models that utilize ConfigurableSwitchComputer. Provides basis functionality like logging
# switch temperature, distribution and images, as well as managing attention norms.
class SwitchModelBase(nn.Module):
def __init__(self, init_temperature=10, final_temperature_step=10000):
super(SwitchModelBase, self).__init__()
self.switches = [] # The implementing class is expected to set this to a list of all ConfigurableSwitchComputers.
self.attentions = [] # The implementing class is expected to set this in forward() to the output of the attention blocks.
self.lr = None # The implementing class is expected to set this to the input image fed into the generator. If not
# set, the attention logger will not output an image reference.
self.init_temperature = init_temperature
self.final_temperature_step = final_temperature_step
def set_temperature(self, temp):
[sw.set_temperature(temp) for sw in self.switches]
def update_for_step(self, step, experiments_path='.'):
# All-reduce the attention norm.
for sw in self.switches:
sw.switch.reduce_norm_params()
temp = max(1, 1 + self.init_temperature *
(self.final_temperature_step - step) / self.final_temperature_step)
self.set_temperature(temp)
if step % 200 == 0:
output_path = os.path.join(experiments_path, "attention_maps")
prefix = "amap_%i_a%i_%%i.png"
[save_attention_to_image_rgb(output_path, self.attentions[i], self.nf, prefix % (step, i), step,
output_mag=False) for i in range(len(self.attentions))]
if self.lr:
torchvision.utils.save_image(self.lr[:, :3], os.path.join(experiments_path, "attention_maps",
"amap_%i_base_image.png" % (step,)))
# This is a bit awkward. We want this plot to show up in TB as a histogram, but we are getting an intensity
# plot out of the attention norm tensor. So we need to convert it back into a list of indexes, then feed into TB.
def compute_anorm_histogram(self):
intensities = [sw.switch.attention_norm.compute_buffer_norm().clone().detach().cpu() for sw in self.switches]
result = []
for intensity in intensities:
intensity = intensity * 10
bins = torch.tensor(list(range(len(intensity))))
intensity = intensity.long()
result.append(bins.repeat_interleave(intensity, 0))
return result
def get_debug_values(self, step, net_name):
temp = self.switches[0].switch.temperature
mean_hists = [compute_attention_specificity(att, 2) for att in self.attentions]
means = [i[0] for i in mean_hists]
hists = [i[1].clone().detach().cpu().flatten() for i in mean_hists]
anorms = self.compute_anorm_histogram()
val = {"switch_temperature": temp}
for i in range(len(means)):
val["switch_%i_specificity" % (i,)] = means[i]
val["switch_%i_histogram" % (i,)] = hists[i]
val["switch_%i_attention_norm_histogram" % (i,)] = anorms[i]
return val
if __name__ == '__main__': if __name__ == '__main__':
bb = BackboneEncoder(64) bb = BackboneEncoder(64)
emb = QueryKeyMultiplexer(64, 10) emb = QueryKeyMultiplexer(64, 10)

View File

@ -32,7 +32,7 @@ def init_dist(backend='nccl', **kwargs):
def main(): def main():
#### options #### options
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_teco_vix_stacked_rrdb.yml') parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_exd_imgset_ssgr.yml')
parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher') parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher')
parser.add_argument('--local_rank', type=int, default=0) parser.add_argument('--local_rank', type=int, default=0)
args = parser.parse_args() args = parser.parse_args()