diff --git a/codes/models/archs/RRDBNet_arch.py b/codes/models/archs/RRDBNet_arch.py index 0133f68b..26ddee36 100644 --- a/codes/models/archs/RRDBNet_arch.py +++ b/codes/models/archs/RRDBNet_arch.py @@ -1,293 +1,145 @@ -import functools import torch import torch.nn as nn import torch.nn.functional as F -import models.archs.arch_util as arch_util -from models.archs.arch_util import PixelUnshuffle -import torchvision -from utils.util import checkpoint +from torch.utils.checkpoint import checkpoint_sequential + +from models.archs.arch_util import make_layer, default_init_weights -class ResidualDenseBlock_5C(nn.Module): - def __init__(self, nf=64, gc=32, bias=True, late_stage_kernel_size=3, late_stage_padding=1): - super(ResidualDenseBlock_5C, self).__init__() - # gc: growth channel, i.e. intermediate channels - self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias) - self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias) - self.conv3 = nn.Conv2d(nf + 2 * gc, gc, late_stage_kernel_size, 1, late_stage_padding, bias=bias) - self.conv4 = nn.Conv2d(nf + 3 * gc, gc, late_stage_kernel_size, 1, late_stage_padding, bias=bias) - self.conv5 = nn.Conv2d(nf + 4 * gc, nf, late_stage_kernel_size, 1, late_stage_padding, bias=bias) +class ResidualDenseBlock(nn.Module): + """Residual Dense Block. + + Used in RRDB block in ESRGAN. + + Args: + mid_channels (int): Channel number of intermediate features. + growth_channels (int): Channels for each growth. + """ + + def __init__(self, mid_channels=64, growth_channels=32): + super(ResidualDenseBlock, self).__init__() + for i in range(5): + out_channels = mid_channels if i == 4 else growth_channels + self.add_module( + f'conv{i+1}', + nn.Conv2d(mid_channels + i * growth_channels, out_channels, 3, + 1, 1)) self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) + for i in range(5): + default_init_weights(getattr(self, f'conv{i+1}'), 0.1) - # initialization - arch_util.initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], - 0.1) def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (n, c, h, w). + + Returns: + Tensor: Forward results. + """ x1 = self.lrelu(self.conv1(x)) x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1))) x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1))) x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1))) x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1)) + # Emperically, we use 0.2 to scale the residual for better performance return x5 * 0.2 + x class RRDB(nn.Module): - '''Residual in Residual Dense Block''' + """Residual in Residual Dense Block. - def __init__(self, nf, gc=32): + Used in RRDB-Net in ESRGAN. + + Args: + mid_channels (int): Channel number of intermediate features. + growth_channels (int): Channels for each growth. + """ + + def __init__(self, mid_channels, growth_channels=32): super(RRDB, self).__init__() - self.RDB1 = ResidualDenseBlock_5C(nf, gc) - self.RDB2 = ResidualDenseBlock_5C(nf, gc) - self.RDB3 = ResidualDenseBlock_5C(nf, gc) + self.rdb1 = ResidualDenseBlock(mid_channels, growth_channels) + self.rdb2 = ResidualDenseBlock(mid_channels, growth_channels) + self.rdb3 = ResidualDenseBlock(mid_channels, growth_channels) def forward(self, x): - out = checkpoint(self.RDB1, x) - out = checkpoint(self.RDB2, out) - out = checkpoint(self.RDB3, out) + """Forward function. + + Args: + x (Tensor): Input tensor with shape (n, c, h, w). + + Returns: + Tensor: Forward results. + """ + out = self.rdb1(x) + out = self.rdb2(out) + out = self.rdb3(out) + # Emperically, we use 0.2 to scale the residual for better performance return out * 0.2 + x -class LowDimRRDB(RRDB): - def __init__(self, nf, gc=32, dimensional_adjustment=4): - super(LowDimRRDB, self).__init__(nf * (dimensional_adjustment ** 2), gc * (dimensional_adjustment ** 2)) - self.unshuffle = PixelUnshuffle(dimensional_adjustment) - self.shuffle = nn.PixelShuffle(dimensional_adjustment) +class RRDBNet(nn.Module): + """Networks consisting of Residual in Residual Dense Block, which is used + in ESRGAN. - def forward(self, x): - x = self.unshuffle(x) - x = super(LowDimRRDB, self).forward(x) - return self.shuffle(x) + ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks. + Currently, it supports x4 upsampling scale factor. + Args: + in_channels (int): Channel number of inputs. + out_channels (int): Channel number of outputs. + mid_channels (int): Channel number of intermediate features. + Default: 64 + num_blocks (int): Block number in the trunk network. Defaults: 23 + growth_channels (int): Channels for each growth. Default: 32. + """ -# Identical to LowDimRRDB but wraps an RRDB rather than inheriting from it. TODO: remove LowDimRRDB when backwards -# compatibility is no longer desired. -class LowDimRRDBWrapper(nn.Module): - # Do not specify nf or gc on the partial_rrdb passed in. That will be done by the wrapper. - def __init__(self, nf, partial_rrdb, gc=32, dimensional_adjustment=4): - super(LowDimRRDBWrapper, self).__init__() - self.rrdb = partial_rrdb(nf=nf * (dimensional_adjustment ** 2), gc=gc * (dimensional_adjustment ** 2)) - self.unshuffle = PixelUnshuffle(dimensional_adjustment) - self.shuffle = nn.PixelShuffle(dimensional_adjustment) - - def forward(self, x): - x = self.unshuffle(x) - x = self.rrdb(x) - return self.shuffle(x) - - -# This module performs the majority of the processing done by RRDBNet. It just doesn't have the upsampling at the end. -class RRDBTrunk(nn.Module): - def __init__(self, nf_in, nf_out, nb, gc=32, initial_stride=1, rrdb_block_f=None, conv_first_block=None): - super(RRDBTrunk, self).__init__() - if rrdb_block_f is None: - rrdb_block_f = functools.partial(RRDB, nf=nf_out, gc=gc) - - if conv_first_block is None: - self.conv_first = nn.Conv2d(nf_in, nf_out, 7, initial_stride, padding=3, bias=True) - else: - self.conv_first = conv_first_block - - self.RRDB_trunk, self.rrdb_layers = arch_util.make_layer(rrdb_block_f, nb, True) - self.trunk_conv = nn.Conv2d(nf_out, nf_out, 3, 1, 1, bias=True) - - # Sets the softmax temperature of each RRDB layer. Only works if you are using attentive - # convolutions. - def set_temperature(self, temp): - for layer in self.rrdb_layers: - layer.set_temperature(temp) - - def forward(self, x): - fea = self.conv_first(x) - trunk = self.trunk_conv(self.RRDB_trunk(fea)) - fea = fea + trunk - return fea - - -# Adds some base methods that all RRDB* classes will use. -class RRDBBase(nn.Module): - def __init__(self): - super(RRDBBase, self).__init__() - - # Sets the softmax temperature of each RRDB layer. Only works if you are using attentive - # convolutions. - def set_temperature(self, temp): - for trunk in self.trunks: - for layer in trunk.rrdb_layers: - layer.set_temperature(temp) - - -# This class uses a RRDBTrunk to perform processing on an image, then upsamples it. -class RRDBNet(RRDBBase): - def __init__(self, in_nc, out_nc, nf, nb, gc=32, scale=2, initial_stride=1, - rrdb_block_f=None): + def __init__(self, + in_channels, + out_channels, + mid_channels=64, + num_blocks=23, + growth_channels=32): super(RRDBNet, self).__init__() - - # Trunk - does actual processing. - self.trunk = RRDBTrunk(in_nc, nf, nb, gc, initial_stride, rrdb_block_f) - self.trunks = [self.trunk] - - # Upsampling - self.scale = scale - self.upconv1 = nn.Conv2d(nf, nf, 5, 1, padding=2, bias=True) - self.upconv2 = nn.Conv2d(nf, nf, 5, 1, padding=2, bias=True) - self.HRconv = nn.Conv2d(nf, nf, 5, 1, padding=2, bias=True) - self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True) + self.conv_first = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) + self.body = make_layer( + RRDB, + num_blocks, + mid_channels=mid_channels, + growth_channels=growth_channels) + self.conv_body = nn.Conv2d(mid_channels, mid_channels, 3, 1, 1) + # upsample + self.conv_up1 = nn.Conv2d(mid_channels, mid_channels, 3, 1, 1) + self.conv_up2 = nn.Conv2d(mid_channels, mid_channels, 3, 1, 1) + self.conv_hr = nn.Conv2d(mid_channels, mid_channels, 3, 1, 1) + self.conv_last = nn.Conv2d(mid_channels, out_channels, 3, 1, 1) self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) - def forward(self, x): - fea = self.trunk(x) - - if self.scale >= 2: - fea = F.interpolate(fea, scale_factor=2, mode='nearest') - fea = self.lrelu(self.upconv1(fea)) - if self.scale >= 4: - fea = F.interpolate(fea, scale_factor=2, mode='nearest') - fea = self.lrelu(self.upconv2(fea)) - out = self.conv_last(self.lrelu(self.HRconv(fea))) - - return out - - def load_state_dict(self, state_dict, strict=True): - # The parameters in self.trunk used to be in this class. To support loading legacy saves, restore them. - t_state = self.trunk.state_dict() - for k in t_state.keys(): - if k in state_dict.keys(): - state_dict["trunk.%s" % (k,)] = state_dict.pop(k) - super(RRDBNet, self).load_state_dict(state_dict, strict) - - -# Variant of RRDBNet that is "assisted" by an external pretrained image classifier whose -# intermediate layers have been splayed out, pixel-shuffled, and fed back in. -# TODO: Convert to use new RRDBBase hierarchy. -class AssistedRRDBNet(nn.Module): - # in_nc=number of input channels. - # out_nc=number of output channels. - # nf=internal filter count - # nb=number of additional blocks after the assistance layers. - # gc=growth channel inside of residual blocks - # scale=the number of times the output is doubled in size. - # initial_stride=the stride on the first conv. can be used to downsample the image for processing. - def __init__(self, in_nc, out_nc, nf, nb, gc=32, scale=2, initial_stride=1): - super(AssistedRRDBNet, self).__init__() - self.scale = scale - self.conv_first = nn.Conv2d(in_nc, nf, 7, initial_stride, padding=3, bias=True) - - # Set-up the assist-net, which should do feature extraction for us. - self.assistnet = torchvision.models.wide_resnet50_2(pretrained=True) - self.set_enable_assistnet_training(False) - assist_nf = [4, 8, 16] # Fixed for resnet. Re-evaluate if using other networks. - self.assist2 = RRDB(nf + assist_nf[0], gc) - self.assist3 = RRDB(nf + sum(assist_nf[:2]), gc) - self.assist4 = RRDB(nf + sum(assist_nf), gc) - nf = nf + sum(assist_nf) - - # After this, it's just a "standard" RRDB net. - RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc) - self.RRDB_trunk = arch_util.make_layer(RRDB_block_f, nb) - self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) - #### upsampling - self.upconv1 = nn.Conv2d(nf, nf, 5, 1, padding=2, bias=True) - self.upconv2 = nn.Conv2d(nf, nf, 5, 1, padding=2, bias=True) - self.HRconv = nn.Conv2d(nf, nf, 5, 1, padding=2, bias=True) - self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True) - - self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) - - def set_enable_assistnet_training(self, en): - for p in self.assistnet.parameters(): - p.requires_grad = en - - def res_extract(self, x): - # Width and height must be factors of 16 to use this architecture. Check that here. - (b, f, w, h) = x.shape - assert w % 16 == 0 - assert h % 16 == 0 - - x = self.assistnet.conv1(x) - x = self.assistnet.bn1(x) - x = self.assistnet.relu(x) - x = self.assistnet.maxpool(x) - - x = self.assistnet.layer1(x) - l1 = F.pixel_shuffle(x, 4) - x = self.assistnet.layer2(x) - l2 = F.pixel_shuffle(x, 8) - x = self.assistnet.layer3(x) - l3 = F.pixel_shuffle(x, 16) - return l1, l2, l3 + for m in [ + self.conv_first, self.conv_body, self.conv_up1, + self.conv_up2, self.conv_hr, self.conv_last + ]: + default_init_weights(m, 0.1) def forward(self, x): - # Invoke the assistant net first. - l1, l2, l3 = self.res_extract(x) + """Forward function. - fea = self.conv_first(x) - fea = self.assist2(torch.cat([fea, l3], dim=1)) - fea = self.assist3(torch.cat([fea, l2], dim=1)) - fea = self.assist4(torch.cat([fea, l1], dim=1)) + Args: + x (Tensor): Input tensor with shape (n, c, h, w). - trunk = self.trunk_conv(self.RRDB_trunk(fea)) - fea = fea + trunk + Returns: + Tensor: Forward results. + """ - if self.scale >= 2: - fea = F.interpolate(fea, scale_factor=2, mode='nearest') - fea = self.lrelu(self.upconv1(fea)) - if self.scale >= 4: - fea = F.interpolate(fea, scale_factor=2, mode='nearest') - fea = self.lrelu(self.upconv2(fea)) - out = self.conv_last(self.lrelu(self.HRconv(fea))) - - return (out,) - - -class PixShuffleInitialConv(nn.Module): - def __init__(self, reduction_factor, nf_out): - super(PixShuffleInitialConv, self).__init__() - self.conv = nn.Conv2d(3 * (reduction_factor ** 2), nf_out, 1) - self.unshuffle = PixelUnshuffle(reduction_factor) - - def forward(self, x): - (b, f, w, h) = x.shape - # This module can only be applied to input images (with 3 channels) - assert f == 3 - - x = self.unshuffle(x) - return self.conv(x) - - -# This class uses a RRDBTrunk to perform processing on an image, then upsamples it. -class PixShuffleRRDB(RRDBBase): - def __init__(self, nf, nb, gc=32, scale=2, rrdb_block_f=None): - super(PixShuffleRRDB, self).__init__() - - # This class does a 4x pixel shuffle on the filter count inside the trunk, so nf must be divisible by 16. - assert nf % 16 == 0 - - # Trunk - does actual processing. - self.trunk = RRDBTrunk(3, nf, nb, gc, 1, rrdb_block_f, PixShuffleInitialConv(4, nf)) - self.trunks = [self.trunk] - - # Upsampling - pix_nf = int(nf/16) - self.scale = scale - self.upconv1 = nn.Conv2d(pix_nf, pix_nf, 5, 1, padding=2, bias=True) - self.upconv2 = nn.Conv2d(pix_nf, pix_nf, 5, 1, padding=2, bias=True) - self.HRconv = nn.Conv2d(pix_nf, pix_nf, 5, 1, padding=2, bias=True) - self.conv_last = nn.Conv2d(pix_nf, 3, 3, 1, 1, bias=True) - self.pixel_shuffle = nn.PixelShuffle(4) - self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) - - def forward(self, x): - fea = self.trunk(x) - fea = self.pixel_shuffle(fea) - - if self.scale >= 2: - fea = F.interpolate(fea, scale_factor=2, mode='nearest') - fea = self.lrelu(self.upconv1(fea)) - if self.scale >= 4: - fea = F.interpolate(fea, scale_factor=2, mode='nearest') - fea = self.lrelu(self.upconv2(fea)) - out = self.conv_last(self.lrelu(self.HRconv(fea))) - - return (out,) \ No newline at end of file + feat = self.conv_first(x) + body_feat = self.conv_body(checkpoint_sequential(self.body, 5, feat)) + feat = feat + body_feat + # upsample + feat = self.lrelu( + self.conv_up1(F.interpolate(feat, scale_factor=2, mode='nearest'))) + feat = self.lrelu( + self.conv_up2(F.interpolate(feat, scale_factor=2, mode='nearest'))) + out = self.conv_last(self.lrelu(self.conv_hr(feat))) + return out \ No newline at end of file diff --git a/codes/models/archs/arch_util.py b/codes/models/archs/arch_util.py index 04d49ee0..ba2e2abd 100644 --- a/codes/models/archs/arch_util.py +++ b/codes/models/archs/arch_util.py @@ -5,6 +5,22 @@ import torch.nn.functional as F import torch.nn.utils.spectral_norm as SpectralNorm from math import sqrt +def kaiming_init(module, + a=0, + mode='fan_out', + nonlinearity='relu', + bias=0, + distribution='normal'): + assert distribution in ['uniform', 'normal'] + if distribution == 'uniform': + nn.init.kaiming_uniform_( + module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + else: + nn.init.kaiming_normal_( + module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + def pixel_norm(x, epsilon=1e-8): return x * torch.rsqrt(torch.mean(torch.pow(x, 2), dim=1, keepdims=True) + epsilon) @@ -28,14 +44,34 @@ def initialize_weights(net_l, scale=1): init.constant_(m.bias.data, 0.0) -def make_layer(block, n_layers, return_layers=False): +def make_layer(block, num_blocks, **kwarg): + """Make layers by stacking the same blocks. + Args: + block (nn.module): nn.module class for basic block. + num_blocks (int): number of blocks. + Returns: + nn.Sequential: Stacked blocks in nn.Sequential. + """ layers = [] - for _ in range(n_layers): - layers.append(block()) - if return_layers: - return nn.Sequential(*layers), layers - else: - return nn.Sequential(*layers) + for _ in range(num_blocks): + layers.append(block(**kwarg)) + return nn.Sequential(*layers) + + +def default_init_weights(module, scale=1): + """Initialize network weights. + Args: + modules (nn.Module): Modules to be initialized. + scale (float): Scale initialized weights, especially for residual + blocks. + """ + for m in module.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m, a=0, mode='fan_in', bias=0) + m.weight.data *= scale + elif isinstance(m, nn.Linear): + kaiming_init(m, a=0, mode='fan_in', bias=0) + m.weight.data *= scale class ResidualBlock(nn.Module): diff --git a/codes/models/base_model.py b/codes/models/base_model.py index ea08aecc..be942956 100644 --- a/codes/models/base_model.py +++ b/codes/models/base_model.py @@ -110,6 +110,8 @@ class BaseModel(): for k, v in load_net.items(): if k.startswith('module.'): load_net_clean[k[7:]] = v + if k.startswith('generator'): # Hack to fix ESRGAN pretrained model. + load_net_clean[k[10:]] = v else: load_net_clean[k] = v network.load_state_dict(load_net_clean, strict=strict) diff --git a/codes/models/networks.py b/codes/models/networks.py index 8423fb7f..fbb67440 100644 --- a/codes/models/networks.py +++ b/codes/models/networks.py @@ -36,14 +36,8 @@ def define_G(opt, net_key='network_G', scale=None): netG = SRResNet_arch.MSRResNet(in_nc=opt_net['in_nc'], out_nc=opt_net['out_nc'], nf=opt_net['nf'], nb=opt_net['nb'], upscale=opt_net['scale']) elif which_model == 'RRDBNet': - # RRDB does scaling in two steps, so take the sqrt of the scale we actually want to achieve and feed it to RRDB. - initial_stride = 1 if 'initial_stride' not in opt_net else opt_net['initial_stride'] - assert initial_stride == 1 or initial_stride == 2 - # Need to adjust the scale the generator sees by the stride since the stride causes a down-sample. - gen_scale = scale * initial_stride - netG = RRDBNet_arch.RRDBNet(in_nc=opt_net['in_nc'], out_nc=opt_net['out_nc'], - nf=opt_net['nf'], nb=opt_net['nb'], scale=opt_net['scale'] if 'scale' in opt_net.keys() else gen_scale, - initial_stride=initial_stride) + netG = RRDBNet_arch.RRDBNet(in_channels=opt_net['in_nc'], out_channels=opt_net['out_nc'], + mid_channels=opt_net['nf'], num_blocks=opt_net['nb']) elif which_model == 'rcan': #args: n_resgroups, n_resblocks, res_scale, reduction, scale, n_feats opt_net['rgb_range'] = 255