From 0a19e53df03a22058f5d122c0ee05c29338c2df8 Mon Sep 17 00:00:00 2001 From: James Betker Date: Mon, 14 Dec 2020 23:59:11 -0700 Subject: [PATCH] BYOL mods --- codes/data/byol_attachment.py | 37 +++++++++++++-------- codes/models/byol/byol_structural.py | 1 + codes/scripts/byol_extract_wrapped_model.py | 2 +- codes/scripts/byol_spinenet_playground.py | 23 ++++++------- 4 files changed, 37 insertions(+), 26 deletions(-) diff --git a/codes/data/byol_attachment.py b/codes/data/byol_attachment.py index 55ae1961..bf1dc2a6 100644 --- a/codes/data/byol_attachment.py +++ b/codes/data/byol_attachment.py @@ -110,14 +110,14 @@ class RandomSharedRegionCrop(nn.Module): d = d // self.multiple # Step 2 - base_w = random.randint(d//2, d-1) + base_w = random.randint(d//2+1, d-1) base_l = random.randint(0, d-base_w) base_h = random.randint(base_w-1, base_w+1) base_t = random.randint(0, d-base_h) base_r, base_b = base_l+base_w, base_t+base_h # Step 3 - im2_w = random.randint(d//2, d-1) + im2_w = random.randint(d//2+1, d-1) im2_l = random.randint(0, d-im2_w) im2_h = random.randint(im2_w-1, im2_w+1) im2_t = random.randint(0, d-im2_h) @@ -153,7 +153,7 @@ class RandomSharedRegionCrop(nn.Module): i2_shared_t, i2_shared_l = snap(im2_t, base_t), snap(im2_l, base_l) ix_h = min(base_b, im2_b) - max(base_t, im2_t) ix_w = min(base_r, im2_r) - max(base_l, im2_l) - recompute_package = torch.tensor([base_h, base_w, i1_shared_t, i1_shared_l, im2_h, im2_w, i2_shared_t, i2_shared_l, should_flip, ix_h, ix_w], dtype=torch.long) + recompute_package = torch.tensor([d, base_h, base_w, i1_shared_t, i1_shared_l, im2_h, im2_w, i2_shared_t, i2_shared_l, should_flip, ix_h, ix_w], dtype=torch.long) # Step 7 mask1 = torch.full((1, base_h*m, base_w*m), fill_value=.5) @@ -167,7 +167,14 @@ class RandomSharedRegionCrop(nn.Module): mask[:, im2_t*m:(im2_t+im2_w)*m, im2_l*m:(im2_l+im2_h)*m] += .33 masked_dbg = i1 * mask - return p1_resized, p2_resized, recompute_package, masked1, masked2, masked_dbg + # Step 8 - Rebuild shared regions for testing purposes. + p1_shuf, p2_shuf = PixelUnshuffle(self.multiple)(p1_resized.unsqueeze(0)), \ + PixelUnshuffle(self.multiple)(p2_resized.unsqueeze(0)) + i1_shared, i2_shared = reconstructed_shared_regions(p1_shuf, p2_shuf, recompute_package.unsqueeze(0)) + i1_shared = pad_to(nn.PixelShuffle(self.multiple)(i1_shared).squeeze(0), d * m) + i2_shared = pad_to(nn.PixelShuffle(self.multiple)(i2_shared).squeeze(0), d*m) + + return p1_resized, p2_resized, recompute_package, masked1, masked2, masked_dbg, i1_shared, i2_shared # Uses the recompute package returned from the above dataset to extract matched-size "similar regions" from two feature @@ -180,14 +187,17 @@ def reconstructed_shared_regions(fea1, fea2, recompute_package: torch.Tensor): # It'd be real nice if we could do this at the batch level, but I don't see a really good way to do that outside # of conforming the recompute_package across the entire batch. for b in range(package.shape[0]): - f1_h, f1_w, f1s_t, f1s_l, f2_h, f2_w, f2s_t, f2s_l, should_flip, s_h, s_w = tuple(package[b].tolist()) + expected_dim, f1_h, f1_w, f1s_t, f1s_l, f2_h, f2_w, f2s_t, f2s_l, should_flip, s_h, s_w = tuple(package[b].tolist()) + # If you are hitting this assert, you specified `latent_multiple` in your dataset config wrong. + assert expected_dim == fea1.shape[2] and expected_dim == fea2.shape[2] + # Unflip 2 if needed. f2 = fea2[b] if should_flip == 1: f2 = kornia.geometry.transform.hflip(f2) # Resize the input features to match - f1s = F.interpolate(fea1[b].unsqueeze(0), (f1_h, f1_w), mode="bilinear") - f2s = F.interpolate(f2.unsqueeze(0), (f2_h, f2_w), mode="bilinear") + f1s = F.interpolate(fea1[b].unsqueeze(0), (f1_h, f1_w), mode="nearest") + f2s = F.interpolate(f2.unsqueeze(0), (f2_h, f2_w), mode="nearest") # Outputs must be padded so they can "get along" with each other. res1.append(pad_to(f1s[:, :, f1s_t:f1s_t+s_h, f1s_l:f1s_l+s_w], pad_dim)) res2.append(pad_to(f2s[:, :, f2s_t:f2s_t+s_h, f2s_l:f2s_l+s_w], pad_dim)) @@ -214,9 +224,10 @@ class StructuredCropDatasetWrapper(Dataset): item = self.wrapped_dataset[item] a1 = self.aug(item['hq']).squeeze(dim=0) a2 = self.aug(item['lq']).squeeze(dim=0) - a1, a2, sr_dim, m1, m2, db = self.rrc(a1, a2) + a1, a2, sr_dim, m1, m2, db, i1s, i2s = self.rrc(a1, a2) item.update({'aug1': a1, 'aug2': a2, 'similar_region_dimensions': sr_dim, - 'masked1': m1, 'masked2': m2, 'aug_shared_view': db}) + 'masked1': m1, 'masked2': m2, 'aug_shared_view': db, + 'i1_shared': i1s, 'i2_shared': i2s}) return item def __len__(self): @@ -240,7 +251,7 @@ if __name__ == '__main__': 'num_corrupts_per_image': 1, 'corrupt_before_downsize': True, }, - 'latent_multiple': 8, + 'latent_multiple': 16, 'jitter_range': 0, } @@ -254,8 +265,8 @@ if __name__ == '__main__': #if k in [ 'aug_shared_view', 'masked1', 'masked2']: #torchvision.utils.save_image(v.unsqueeze(0), "debug/%i_%s.png" % (i, k)) rcpkg = o['similar_region_dimensions'] - pixun = PixelUnshuffle(8) - pixsh = nn.PixelShuffle(8) - rc1, rc2 = reconstructed_shared_regions(pixun(o['aug1'].unsqueeze(0)), pixun(o['aug2'].unsqueeze(0)), rcpkg) + pixun = PixelUnshuffle(16) + pixsh = nn.PixelShuffle(16) + rc1, rc2 = reconstructed_shared_regions(pixun(o['aug1'].unsqueeze(0)), pixun(o['aug2'].unsqueeze(0)), rcpkg.unsqueeze(0)) #torchvision.utils.save_image(pixsh(rc1), "debug/%i_rc1.png" % (i,)) #torchvision.utils.save_image(pixsh(rc2), "debug/%i_rc2.png" % (i,)) diff --git a/codes/models/byol/byol_structural.py b/codes/models/byol/byol_structural.py index 3ebe5fcf..17d5cfbb 100644 --- a/codes/models/byol/byol_structural.py +++ b/codes/models/byol/byol_structural.py @@ -113,6 +113,7 @@ class StructuralBYOL(nn.Module): if pretrained_state_dict: net.load_state_dict(torch.load(pretrained_state_dict), strict=True) self.freeze_until = freeze_until + self.frozen = False if self.freeze_until > 0: for p in net.parameters(): p.DO_NOT_TRAIN = True diff --git a/codes/scripts/byol_extract_wrapped_model.py b/codes/scripts/byol_extract_wrapped_model.py index 545e51d9..f5e80c54 100644 --- a/codes/scripts/byol_extract_wrapped_model.py +++ b/codes/scripts/byol_extract_wrapped_model.py @@ -3,7 +3,7 @@ import torch from models.archs.spinenet_arch import SpineNet if __name__ == '__main__': - pretrained_path = '../../experiments/train_sbyol_512unsupervised/models/35000_generator.pth' + pretrained_path = '../../experiments/train_sbyol_512unsupervised_restart/models/48000_generator.pth' output_path = '../../experiments/spinenet49_imgset_sbyol.pth' wrap_key = 'online_encoder.net.' diff --git a/codes/scripts/byol_spinenet_playground.py b/codes/scripts/byol_spinenet_playground.py index 33558b85..c28b955e 100644 --- a/codes/scripts/byol_spinenet_playground.py +++ b/codes/scripts/byol_spinenet_playground.py @@ -171,21 +171,20 @@ def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=str t = lat_patch_size * u[1] l = lat_patch_size * u[2] patch = img[:, t:t + lat_patch_size, l:l + lat_patch_size] - img_out[:,:, h_ * lat_patch_size:h_ * lat_patch_size + lat_patch_size, - w_ * lat_patch_size:w_ * lat_patch_size + lat_patch_size] = patch + io_loc_t = h_ * lat_patch_size + io_loc_l = w_ * lat_patch_size + img_out[:,:,io_loc_t:io_loc_t+lat_patch_size,io_loc_l:io_loc_l+lat_patch_size] = patch # Also save the image with a masked map mask = torch.full_like(img, fill_value=.3) mask[:, t:t + lat_patch_size, l:l + lat_patch_size] = 1 masked_img = img * mask - masked_src_img_output_file = os.path.join(output_path, "%i_%i__%i.png" % (t, l, u[0])) + masked_src_img_output_file = os.path.join(output_path, "%i_%i__%i.png" % (io_loc_t, io_loc_l, u[0])) torchvision.utils.save_image(masked_img, masked_src_img_output_file) # Update the image map areas. - img_map_areas.append('' % (w_ * lat_patch_size, - h_ * lat_patch_size, - w_ * lat_patch_size + lat_patch_size, - h_ * lat_patch_size + lat_patch_size, + img_map_areas.append('' % (io_loc_l, io_loc_t, + io_loc_l + lat_patch_size, io_loc_t + lat_patch_size, masked_src_img_output_file)) torchvision.utils.save_image(img_out, os.path.join(output_path, "output.png")) torchvision.utils.save_image(img_t, os.path.join(output_path, "source.png")) @@ -226,20 +225,20 @@ class BYOLModelWrapper(nn.Module): if __name__ == '__main__': - util.loaded_options = {'checkpointing_enabled': True} pretrained_path = '../../experiments/spinenet49_imgset_sbyol.pth' model = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda') model.load_state_dict(torch.load(pretrained_path), strict=True) model.eval() - #pretrained_path = '../../experiments/train_sbyol_512unsupervised/models/35000_generator.pth' + #util.loaded_options = {'checkpointing_enabled': True} + #pretrained_path = '../../experiments/train_sbyol_512unsupervised_restart/models/48000_generator.pth' #from models.byol.byol_structural import StructuralBYOL #subnet = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda') - #model = StructuralBYOL(subnet, image_size=256, hidden_layer='endpoint_convs.3.conv') + #model = StructuralBYOL(subnet, image_size=256, hidden_layer='endpoint_convs.4.conv') #model.load_state_dict(torch.load(pretrained_path), strict=True) #model = BYOLModelWrapper(model) #model.eval() with torch.no_grad(): - #create_latent_database(model, 0) # 0 = model output dimension to use for latent storage - find_similar_latents(model, 0, 8, structural_euc_dist) # 1 = model output dimension to use for latent predictor. + #create_latent_database(model, 1) # 0 = model output dimension to use for latent storage + find_similar_latents(model, 1, 16, structural_euc_dist) # 1 = model output dimension to use for latent predictor.