From 9fed90393f1de07891d2178e54ba08be6706641c Mon Sep 17 00:00:00 2001 From: James Betker Date: Tue, 5 Jan 2021 20:14:22 -0700 Subject: [PATCH] Add lucidrains pixpro trainer --- .../__init__.py | 0 .../pixpro_lucidrains.py | 487 ++++++++++++++++++ .../resnet_unet.py | 153 ++++++ codes/scripts/byol_extract_wrapped_model.py | 4 +- codes/scripts/extract_subimages_with_ref.py | 14 +- codes/train.py | 2 +- 6 files changed, 650 insertions(+), 10 deletions(-) create mode 100644 codes/models/pixel_level_contrastive_learning/__init__.py create mode 100644 codes/models/pixel_level_contrastive_learning/pixpro_lucidrains.py create mode 100644 codes/models/pixel_level_contrastive_learning/resnet_unet.py diff --git a/codes/models/pixel_level_contrastive_learning/__init__.py b/codes/models/pixel_level_contrastive_learning/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codes/models/pixel_level_contrastive_learning/pixpro_lucidrains.py b/codes/models/pixel_level_contrastive_learning/pixpro_lucidrains.py new file mode 100644 index 00000000..9d13da31 --- /dev/null +++ b/codes/models/pixel_level_contrastive_learning/pixpro_lucidrains.py @@ -0,0 +1,487 @@ +import math +import copy +import os +import random +from functools import wraps, partial +from math import floor + +import torch +import torchvision +from torch import nn, einsum +import torch.nn.functional as F + +from kornia import augmentation as augs +from kornia import filters, color + +from einops import rearrange + +# helper functions +from trainer.networks import register_model, create_model + + +def identity(t): + return t + +def default(val, def_val): + return def_val if val is None else val + +def rand_true(prob): + return random.random() < prob + +def singleton(cache_key): + def inner_fn(fn): + @wraps(fn) + def wrapper(self, *args, **kwargs): + instance = getattr(self, cache_key) + if instance is not None: + return instance + + instance = fn(self, *args, **kwargs) + setattr(self, cache_key, instance) + return instance + return wrapper + return inner_fn + +def get_module_device(module): + return next(module.parameters()).device + +def set_requires_grad(model, val): + for p in model.parameters(): + p.requires_grad = val + +def cutout_coordinates(image, ratio_range = (0.6, 0.8)): + _, _, orig_h, orig_w = image.shape + + ratio_lo, ratio_hi = ratio_range + random_ratio = ratio_lo + random.random() * (ratio_hi - ratio_lo) + w, h = floor(random_ratio * orig_w), floor(random_ratio * orig_h) + coor_x = floor((orig_w - w) * random.random()) + coor_y = floor((orig_h - h) * random.random()) + return ((coor_y, coor_y + h), (coor_x, coor_x + w)), random_ratio + +def cutout_and_resize(image, coordinates, output_size = None, mode = 'nearest'): + shape = image.shape + output_size = default(output_size, shape[2:]) + (y0, y1), (x0, x1) = coordinates + cutout_image = image[:, :, y0:y1, x0:x1] + return F.interpolate(cutout_image, size = output_size, mode = mode) + +# augmentation utils + +class RandomApply(nn.Module): + def __init__(self, fn, p): + super().__init__() + self.fn = fn + self.p = p + def forward(self, x): + if random.random() > self.p: + return x + return self.fn(x) + +# exponential moving average + +class EMA(): + def __init__(self, beta): + super().__init__() + self.beta = beta + + def update_average(self, old, new): + if old is None: + return new + return old * self.beta + (1 - self.beta) * new + +def update_moving_average(ema_updater, ma_model, current_model): + for current_params, ma_params in zip(current_model.parameters(), ma_model.parameters()): + old_weight, up_weight = ma_params.data, current_params.data + ma_params.data = ema_updater.update_average(old_weight, up_weight) + +# loss fn + +def loss_fn(x, y): + x = F.normalize(x, dim=-1, p=2) + y = F.normalize(y, dim=-1, p=2) + return 2 - 2 * (x * y).sum(dim=-1) + +# classes + +class MLP(nn.Module): + def __init__(self, chan, chan_out = 256, inner_dim = 2048): + super().__init__() + self.net = nn.Sequential( + nn.Linear(chan, inner_dim), + nn.BatchNorm1d(inner_dim), + nn.ReLU(), + nn.Linear(inner_dim, chan_out) + ) + + def forward(self, x): + return self.net(x) + +class ConvMLP(nn.Module): + def __init__(self, chan, chan_out = 256, inner_dim = 2048): + super().__init__() + self.net = nn.Sequential( + nn.Conv2d(chan, inner_dim, 1), + nn.BatchNorm2d(inner_dim), + nn.ReLU(), + nn.Conv2d(inner_dim, chan_out, 1) + ) + + def forward(self, x): + return self.net(x) + +class PPM(nn.Module): + def __init__( + self, + *, + chan, + num_layers = 1, + gamma = 2): + super().__init__() + self.gamma = gamma + + if num_layers == 0: + self.transform_net = nn.Identity() + elif num_layers == 1: + self.transform_net = nn.Conv2d(chan, chan, 1) + elif num_layers == 2: + self.transform_net = nn.Sequential( + nn.Conv2d(chan, chan, 1), + nn.BatchNorm2d(chan), + nn.ReLU(), + nn.Conv2d(chan, chan, 1) + ) + else: + raise ValueError('num_layers must be one of 0, 1, or 2') + + def forward(self, x): + xi = x[:, :, :, :, None, None] + xj = x[:, :, None, None, :, :] + similarity = F.relu(F.cosine_similarity(xi, xj, dim = 1)) ** self.gamma + + transform_out = self.transform_net(x) + out = einsum('b x y h w, b c h w -> b c x y', similarity, transform_out) + return out + +# a wrapper class for the base neural network +# will manage the interception of the hidden layer output +# and pipe it into the projecter and predictor nets + +class NetWrapper(nn.Module): + def __init__( + self, + *, + net, + projection_size, + projection_hidden_size, + layer_pixel = -2, + layer_instance = -2 + ): + super().__init__() + self.net = net + self.layer_pixel = layer_pixel + self.layer_instance = layer_instance + + self.pixel_projector = None + self.instance_projector = None + + self.projection_size = projection_size + self.projection_hidden_size = projection_hidden_size + + self.hidden_pixel = None + self.hidden_instance = None + self.hook_registered = False + + def _find_layer(self, layer_id): + if type(layer_id) == str: + modules = dict([*self.net.named_modules()]) + return modules.get(layer_id, None) + elif type(layer_id) == int: + children = [*self.net.children()] + return children[layer_id] + return None + + def _hook(self, attr_name, _, __, output): + setattr(self, attr_name, output) + + def _register_hook(self): + pixel_layer = self._find_layer(self.layer_pixel) + instance_layer = self._find_layer(self.layer_instance) + + assert pixel_layer is not None, f'hidden layer ({self.layer_pixel}) not found' + assert instance_layer is not None, f'hidden layer ({self.layer_instance}) not found' + + pixel_layer.register_forward_hook(partial(self._hook, 'hidden_pixel')) + instance_layer.register_forward_hook(partial(self._hook, 'hidden_instance')) + self.hook_registered = True + + @singleton('pixel_projector') + def _get_pixel_projector(self, hidden): + _, dim, *_ = hidden.shape + projector = ConvMLP(dim, self.projection_size, self.projection_hidden_size) + return projector.to(hidden) + + @singleton('instance_projector') + def _get_instance_projector(self, hidden): + _, dim = hidden.shape + projector = MLP(dim, self.projection_size, self.projection_hidden_size) + return projector.to(hidden) + + def get_representation(self, x): + if not self.hook_registered: + self._register_hook() + + _ = self.net(x) + hidden_pixel = self.hidden_pixel + hidden_instance = self.hidden_instance + self.hidden_pixel = None + self.hidden_instance = None + assert hidden_pixel is not None, f'hidden pixel layer {self.layer_pixel} never emitted an output' + assert hidden_instance is not None, f'hidden instance layer {self.layer_instance} never emitted an output' + return hidden_pixel, hidden_instance + + def forward(self, x): + pixel_representation, instance_representation = self.get_representation(x) + instance_representation = instance_representation.flatten(1) + + pixel_projector = self._get_pixel_projector(pixel_representation) + instance_projector = self._get_instance_projector(instance_representation) + + pixel_projection = pixel_projector(pixel_representation) + instance_projection = instance_projector(instance_representation) + return pixel_projection, instance_projection + +# main class + +class PixelCL(nn.Module): + def __init__( + self, + net, + image_size, + hidden_layer_pixel = -2, + hidden_layer_instance = -2, + projection_size = 256, + projection_hidden_size = 2048, + augment_fn = None, + augment_fn2 = None, + prob_rand_hflip = 0.25, + moving_average_decay = 0.99, + ppm_num_layers = 1, + ppm_gamma = 2, + distance_thres = 0.7, + similarity_temperature = 0.3, + alpha = 1., + use_pixpro = True, + cutout_ratio_range = (0.6, 0.8), + cutout_interpolate_mode = 'nearest', + coord_cutout_interpolate_mode = 'bilinear' + ): + super().__init__() + + DEFAULT_AUG = nn.Sequential( + RandomApply(augs.ColorJitter(0.3, 0.3, 0.3, 0.2), p=0.8), + augs.RandomGrayscale(p=0.2), + RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1) + ) + + self.augment1 = default(augment_fn, DEFAULT_AUG) + self.augment2 = default(augment_fn2, self.augment1) + self.prob_rand_hflip = prob_rand_hflip + + self.online_encoder = NetWrapper( + net = net, + projection_size = projection_size, + projection_hidden_size = projection_hidden_size, + layer_pixel = hidden_layer_pixel, + layer_instance = hidden_layer_instance + ) + + self.target_encoder = None + self.target_ema_updater = EMA(moving_average_decay) + + self.distance_thres = distance_thres + self.similarity_temperature = similarity_temperature + self.alpha = alpha + + self.use_pixpro = use_pixpro + + if use_pixpro: + self.propagate_pixels = PPM( + chan = projection_size, + num_layers = ppm_num_layers, + gamma = ppm_gamma + ) + + self.cutout_ratio_range = cutout_ratio_range + self.cutout_interpolate_mode = cutout_interpolate_mode + self.coord_cutout_interpolate_mode = coord_cutout_interpolate_mode + + # instance level predictor + self.online_predictor = MLP(projection_size, projection_size, projection_hidden_size) + + # get device of network and make wrapper same device + device = get_module_device(net) + self.to(device) + + # send a mock image tensor to instantiate singleton parameters + self.forward(torch.randn(2, 3, image_size, image_size, device=device)) + + @singleton('target_encoder') + def _get_target_encoder(self): + target_encoder = copy.deepcopy(self.online_encoder) + set_requires_grad(target_encoder, False) + return target_encoder + + def reset_moving_average(self): + del self.target_encoder + self.target_encoder = None + + def update_moving_average(self): + assert self.target_encoder is not None, 'target encoder has not been created yet' + update_moving_average(self.target_ema_updater, self.target_encoder, self.online_encoder) + + def forward(self, x): + shape, device, prob_flip = x.shape, x.device, self.prob_rand_hflip + + rand_flip_fn = lambda t: torch.flip(t, dims = (-1,)) + + flip_image_one, flip_image_two = rand_true(prob_flip), rand_true(prob_flip) + flip_image_one_fn = rand_flip_fn if flip_image_one else identity + flip_image_two_fn = rand_flip_fn if flip_image_two else identity + + cutout_coordinates_one, _ = cutout_coordinates(x, self.cutout_ratio_range) + cutout_coordinates_two, _ = cutout_coordinates(x, self.cutout_ratio_range) + + image_one_cutout = cutout_and_resize(x, cutout_coordinates_one, mode = self.cutout_interpolate_mode) + image_two_cutout = cutout_and_resize(x, cutout_coordinates_two, mode = self.cutout_interpolate_mode) + + image_one_cutout = flip_image_one_fn(image_one_cutout) + image_two_cutout = flip_image_two_fn(image_two_cutout) + + image_one_cutout, image_two_cutout = self.augment1(image_one_cutout), self.augment2(image_two_cutout) + + self.aug1 = image_one_cutout.detach().clone() + self.aug2 = image_two_cutout.detach().clone() + + proj_pixel_one, proj_instance_one = self.online_encoder(image_one_cutout) + proj_pixel_two, proj_instance_two = self.online_encoder(image_two_cutout) + + image_h, image_w = shape[2:] + + proj_image_shape = proj_pixel_one.shape[2:] + proj_image_h, proj_image_w = proj_image_shape + + coordinates = torch.meshgrid( + torch.arange(image_h, device = device), + torch.arange(image_w, device = device) + ) + + coordinates = torch.stack(coordinates).unsqueeze(0).float() + coordinates /= math.sqrt(image_h ** 2 + image_w ** 2) + coordinates[:, 0] *= proj_image_h + coordinates[:, 1] *= proj_image_w + + proj_coors_one = cutout_and_resize(coordinates, cutout_coordinates_one, output_size = proj_image_shape, mode = self.coord_cutout_interpolate_mode) + proj_coors_two = cutout_and_resize(coordinates, cutout_coordinates_two, output_size = proj_image_shape, mode = self.coord_cutout_interpolate_mode) + + proj_coors_one = flip_image_one_fn(proj_coors_one) + proj_coors_two = flip_image_two_fn(proj_coors_two) + + proj_coors_one, proj_coors_two = map(lambda t: rearrange(t, 'b c h w -> (b h w) c'), (proj_coors_one, proj_coors_two)) + pdist = nn.PairwiseDistance(p = 2) + + num_pixels = proj_coors_one.shape[0] + + proj_coors_one_expanded = proj_coors_one[:, None].expand(num_pixels, num_pixels, -1).reshape(num_pixels * num_pixels, 2) + proj_coors_two_expanded = proj_coors_two[None, :].expand(num_pixels, num_pixels, -1).reshape(num_pixels * num_pixels, 2) + + distance_matrix = pdist(proj_coors_one_expanded, proj_coors_two_expanded) + distance_matrix = distance_matrix.reshape(num_pixels, num_pixels) + + positive_mask_one_two = distance_matrix < self.distance_thres + positive_mask_two_one = positive_mask_one_two.t() + + with torch.no_grad(): + target_encoder = self._get_target_encoder() + target_proj_pixel_one, target_proj_instance_one = target_encoder(image_one_cutout) + target_proj_pixel_two, target_proj_instance_two = target_encoder(image_two_cutout) + + # flatten all the pixel projections + + flatten = lambda t: rearrange(t, 'b c h w -> b c (h w)') + + target_proj_pixel_one, target_proj_pixel_two = list(map(flatten, (target_proj_pixel_one, target_proj_pixel_two))) + + # get total number of positive pixel pairs + + positive_pixel_pairs = positive_mask_one_two.sum() + + # get instance level loss + + pred_instance_one = self.online_predictor(proj_instance_one) + pred_instance_two = self.online_predictor(proj_instance_two) + + loss_instance_one = loss_fn(pred_instance_one, target_proj_instance_two.detach()) + loss_instance_two = loss_fn(pred_instance_two, target_proj_instance_one.detach()) + + instance_loss = (loss_instance_one + loss_instance_two).mean() + + if positive_pixel_pairs == 0: + return instance_loss, 0 + + if not self.use_pixpro: + # calculate pix contrast loss + + proj_pixel_one, proj_pixel_two = list(map(flatten, (proj_pixel_one, proj_pixel_two))) + + similarity_one_two = F.cosine_similarity(proj_pixel_one[..., :, None], target_proj_pixel_two[..., None, :], dim = 1) / self.similarity_temperature + similarity_two_one = F.cosine_similarity(proj_pixel_two[..., :, None], target_proj_pixel_one[..., None, :], dim = 1) / self.similarity_temperature + + loss_pix_one_two = -torch.log( + similarity_one_two.masked_select(positive_mask_one_two[None, ...]).exp().sum() / + similarity_one_two.exp().sum() + ) + + loss_pix_two_one = -torch.log( + similarity_two_one.masked_select(positive_mask_two_one[None, ...]).exp().sum() / + similarity_two_one.exp().sum() + ) + + pix_loss = (loss_pix_one_two + loss_pix_two_one) / 2 + else: + # calculate pix pro loss + + propagated_pixels_one = self.propagate_pixels(proj_pixel_one) + propagated_pixels_two = self.propagate_pixels(proj_pixel_two) + + propagated_pixels_one, propagated_pixels_two = list(map(flatten, (propagated_pixels_one, propagated_pixels_two))) + + propagated_similarity_one_two = F.cosine_similarity(propagated_pixels_one[..., :, None], target_proj_pixel_two[..., None, :], dim = 1) + propagated_similarity_two_one = F.cosine_similarity(propagated_pixels_two[..., :, None], target_proj_pixel_one[..., None, :], dim = 1) + + loss_pixpro_one_two = - propagated_similarity_one_two.masked_select(positive_mask_one_two[None, ...]).mean() + loss_pixpro_two_one = - propagated_similarity_two_one.masked_select(positive_mask_two_one[None, ...]).mean() + + pix_loss = (loss_pixpro_one_two + loss_pixpro_two_one) / 2 + + # total loss + + loss = pix_loss * self.alpha + instance_loss + return loss, positive_pixel_pairs + + # Allows visualizing what the augmentor is up to. + def visual_dbg(self, step, path): + if not hasattr(self, 'aug1'): + return + torchvision.utils.save_image(self.aug1, os.path.join(path, "%i_aug1.png" % (step,))) + torchvision.utils.save_image(self.aug2, os.path.join(path, "%i_aug2.png" % (step,))) + + +@register_model +def register_pixel_contrastive_learner(opt_net, opt): + subnet = create_model(opt, opt_net['subnet']) + kwargs = opt_net['kwargs'] + if 'subnet_pretrain_path' in opt_net.keys(): + sd = torch.load(opt_net['subnet_pretrain_path']) + subnet.load_state_dict(sd, strict=False) + return PixelCL(subnet, **kwargs) diff --git a/codes/models/pixel_level_contrastive_learning/resnet_unet.py b/codes/models/pixel_level_contrastive_learning/resnet_unet.py new file mode 100644 index 00000000..e9fdbfaa --- /dev/null +++ b/codes/models/pixel_level_contrastive_learning/resnet_unet.py @@ -0,0 +1,153 @@ +# Resnet implementation that adds a u-net style up-conversion component to output values at a +# specified pixel density. +# +# The downsampling part of the network is compatible with the built-in torch resnet for use in +# transfer learning. +# +# Only resnet50 currently supported. + +import torch +import torch.nn as nn +from torchvision.models.resnet import BasicBlock, Bottleneck, conv1x1, conv3x3 +from torchvision.models.utils import load_state_dict_from_url +import torchvision + + +from trainer.networks import register_model +from utils.util import checkpoint + +model_urls = { + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', +} + + +class ReverseBottleneck(nn.Module): + + def __init__(self, inplanes, planes, groups=1, passthrough=False, + base_width=64, dilation=1, norm_layer=None): + super().__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width / 64.)) * groups + self.passthrough = passthrough + if passthrough: + self.integrate = conv1x1(inplanes*2, inplanes) + self.bn_integrate = norm_layer(inplanes) + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, groups, dilation) + self.bn2 = norm_layer(width) + self.residual_upsample = nn.Sequential( + nn.Upsample(scale_factor=2, mode='nearest'), + conv1x1(width, width), + norm_layer(width), + ) + self.conv3 = conv1x1(width, planes) + self.bn3 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.upsample = nn.Sequential( + nn.Upsample(scale_factor=2, mode='nearest'), + conv1x1(inplanes, planes), + norm_layer(planes), + ) + + def forward(self, x, passthrough=None): + if self.passthrough: + x = self.bn_integrate(self.integrate(torch.cat([x, passthrough], dim=1))) + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.residual_upsample(out) + + out = self.conv3(out) + out = self.bn3(out) + + identity = self.upsample(x) + + out = out + identity + out = self.relu(out) + + return out + + +class UResNet50(torchvision.models.resnet.ResNet): + + def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, + groups=1, width_per_group=64, replace_stride_with_dilation=None, + norm_layer=None): + super().__init__(block, layers, num_classes, zero_init_residual, groups, width_per_group, + replace_stride_with_dilation, norm_layer) + if norm_layer is None: + norm_layer = nn.BatchNorm2d + ''' + # For reference: + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, + dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, + dilate=replace_stride_with_dilation[1]) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, + dilate=replace_stride_with_dilation[2]) + ''' + uplayers = [] + inplanes = 2048 + first = True + for i in range(2): + uplayers.append(ReverseBottleneck(inplanes, inplanes // 2, norm_layer=norm_layer, passthrough=not first)) + inplanes = inplanes // 2 + first = False + self.uplayers = nn.ModuleList(uplayers) + self.tail = nn.Sequential(conv1x1(1024, 512), + norm_layer(512), + nn.ReLU(), + conv3x3(512, 512), + norm_layer(512), + nn.ReLU(), + conv1x1(512, 128)) + + del self.fc # Not used in this implementation and just consumes a ton of GPU memory. + + + def _forward_impl(self, x): + # Should be the exact same implementation of torchvision.models.resnet.ResNet.forward_impl, + # except using checkpoints on the body conv layers. + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x1 = checkpoint(self.layer1, x) + x2 = checkpoint(self.layer2, x1) + x3 = checkpoint(self.layer3, x2) + x4 = checkpoint(self.layer4, x3) + unused = self.avgpool(x4) # This is performed for instance-level pixpro learning, even though it is unused. + + x = checkpoint(self.uplayers[0], x4) + x = checkpoint(self.uplayers[1], x, x3) + #x = checkpoint(self.uplayers[2], x, x2) + #x = checkpoint(self.uplayers[3], x, x1) + + return checkpoint(self.tail, torch.cat([x, x2], dim=1)) + + def forward(self, x): + return self._forward_impl(x) + + +@register_model +def register_u_resnet50(opt_net, opt): + model = UResNet50(Bottleneck, [3, 4, 6, 3]) + return model + + +if __name__ == '__main__': + model = UResNet50(Bottleneck, [3,4,6,3]) + samp = torch.rand(1,3,224,224) + model(samp) + # For pixpro: attach to "tail.3" diff --git a/codes/scripts/byol_extract_wrapped_model.py b/codes/scripts/byol_extract_wrapped_model.py index 0b5147c4..65652f16 100644 --- a/codes/scripts/byol_extract_wrapped_model.py +++ b/codes/scripts/byol_extract_wrapped_model.py @@ -3,8 +3,8 @@ import torch from models.spinenet_arch import SpineNet if __name__ == '__main__': - pretrained_path = '../../experiments/byol_discriminator.pth' - output_path = '../../experiments/byol_discriminator_extracted.pth' + pretrained_path = '../../experiments/resnet_byol_diffframe_115k.pth' + output_path = '../../experiments/resnet_byol_diffframe_115k_.pth' wrap_key = 'online_encoder.net.' sd = torch.load(pretrained_path) diff --git a/codes/scripts/extract_subimages_with_ref.py b/codes/scripts/extract_subimages_with_ref.py index c6dd1182..a7df57c3 100644 --- a/codes/scripts/extract_subimages_with_ref.py +++ b/codes/scripts/extract_subimages_with_ref.py @@ -19,13 +19,13 @@ def main(): # compression time. If read raw images during training, use 0 for faster IO speed. opt['dest'] = 'file' - opt['input_folder'] = 'F:\\4k6k\\datasets\\ns_images\\imagesets\\images' - opt['save_folder'] = 'F:\\4k6k\\datasets\\ns_images\\imagesets\\512_with_ref_new' - opt['crop_sz'] = [1024, 2048] # the size of each sub-image - opt['step'] = [700, 1200] # step of the sliding crop window - opt['exclusions'] = [[],[],[]] # image names matching these terms wont be included in the processing. - opt['thres_sz'] = 256 # size threshold - opt['resize_final_img'] = [.5, .25] + opt['input_folder'] = 'F:\\4k6k\\datasets\\ns_images\\imagesets\\imageset_1024_square_with_new' + opt['save_folder'] = 'F:\\4k6k\\datasets\\ns_images\\imagesets\\256_with_ref_v5' + opt['crop_sz'] = [256, 512] # the size of each sub-image + opt['step'] = [256, 512] # step of the sliding crop window + opt['exclusions'] = [[],[]] # image names matching these terms wont be included in the processing. + opt['thres_sz'] = 129 # size threshold + opt['resize_final_img'] = [1, .5] opt['only_resize'] = False opt['vertical_split'] = False opt['input_image_max_size_before_being_halved'] = 5500 # As described, images larger than this dimensional size will be halved before anything else is done. diff --git a/codes/train.py b/codes/train.py index 6d5ca881..2cef4362 100644 --- a/codes/train.py +++ b/codes/train.py @@ -295,7 +295,7 @@ class Trainer: if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../experiments/train_xxfaces_styled_sr/train_xxfaces_styled_sr.yml') + parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_pixpro_resnet.yml') parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher') parser.add_argument('--local_rank', type=int, default=0) args = parser.parse_args()