From 0a19e53df03a22058f5d122c0ee05c29338c2df8 Mon Sep 17 00:00:00 2001
From: James Betker <jbetker@gmail.com>
Date: Mon, 14 Dec 2020 23:59:11 -0700
Subject: [PATCH] BYOL mods

---
 codes/data/byol_attachment.py               | 37 +++++++++++++--------
 codes/models/byol/byol_structural.py        |  1 +
 codes/scripts/byol_extract_wrapped_model.py |  2 +-
 codes/scripts/byol_spinenet_playground.py   | 23 ++++++-------
 4 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/codes/data/byol_attachment.py b/codes/data/byol_attachment.py
index 55ae1961..bf1dc2a6 100644
--- a/codes/data/byol_attachment.py
+++ b/codes/data/byol_attachment.py
@@ -110,14 +110,14 @@ class RandomSharedRegionCrop(nn.Module):
         d = d // self.multiple
 
         # Step 2
-        base_w = random.randint(d//2, d-1)
+        base_w = random.randint(d//2+1, d-1)
         base_l = random.randint(0, d-base_w)
         base_h = random.randint(base_w-1, base_w+1)
         base_t = random.randint(0, d-base_h)
         base_r, base_b = base_l+base_w, base_t+base_h
 
         # Step 3
-        im2_w = random.randint(d//2, d-1)
+        im2_w = random.randint(d//2+1, d-1)
         im2_l = random.randint(0, d-im2_w)
         im2_h = random.randint(im2_w-1, im2_w+1)
         im2_t = random.randint(0, d-im2_h)
@@ -153,7 +153,7 @@ class RandomSharedRegionCrop(nn.Module):
         i2_shared_t, i2_shared_l = snap(im2_t, base_t), snap(im2_l, base_l)
         ix_h = min(base_b, im2_b) - max(base_t, im2_t)
         ix_w = min(base_r, im2_r) - max(base_l, im2_l)
-        recompute_package = torch.tensor([base_h, base_w, i1_shared_t, i1_shared_l, im2_h, im2_w, i2_shared_t, i2_shared_l, should_flip, ix_h, ix_w], dtype=torch.long)
+        recompute_package = torch.tensor([d, base_h, base_w, i1_shared_t, i1_shared_l, im2_h, im2_w, i2_shared_t, i2_shared_l, should_flip, ix_h, ix_w], dtype=torch.long)
 
         # Step 7
         mask1 = torch.full((1, base_h*m, base_w*m), fill_value=.5)
@@ -167,7 +167,14 @@ class RandomSharedRegionCrop(nn.Module):
         mask[:, im2_t*m:(im2_t+im2_w)*m, im2_l*m:(im2_l+im2_h)*m] += .33
         masked_dbg = i1 * mask
 
-        return p1_resized, p2_resized, recompute_package, masked1, masked2, masked_dbg
+        # Step 8 - Rebuild shared regions for testing purposes.
+        p1_shuf, p2_shuf = PixelUnshuffle(self.multiple)(p1_resized.unsqueeze(0)), \
+                           PixelUnshuffle(self.multiple)(p2_resized.unsqueeze(0))
+        i1_shared, i2_shared = reconstructed_shared_regions(p1_shuf, p2_shuf, recompute_package.unsqueeze(0))
+        i1_shared = pad_to(nn.PixelShuffle(self.multiple)(i1_shared).squeeze(0), d * m)
+        i2_shared = pad_to(nn.PixelShuffle(self.multiple)(i2_shared).squeeze(0), d*m)
+
+        return p1_resized, p2_resized, recompute_package, masked1, masked2, masked_dbg, i1_shared, i2_shared
 
 
 # Uses the recompute package returned from the above dataset to extract matched-size "similar regions" from two feature
@@ -180,14 +187,17 @@ def reconstructed_shared_regions(fea1, fea2, recompute_package: torch.Tensor):
     # It'd be real nice if we could do this at the batch level, but I don't see a really good way to do that outside
     # of conforming the recompute_package across the entire batch.
     for b in range(package.shape[0]):
-        f1_h, f1_w, f1s_t, f1s_l, f2_h, f2_w, f2s_t, f2s_l, should_flip, s_h, s_w = tuple(package[b].tolist())
+        expected_dim, f1_h, f1_w, f1s_t, f1s_l, f2_h, f2_w, f2s_t, f2s_l, should_flip, s_h, s_w = tuple(package[b].tolist())
+        # If you are hitting this assert, you specified `latent_multiple` in your dataset config wrong.
+        assert expected_dim == fea1.shape[2] and expected_dim == fea2.shape[2]
+
         # Unflip 2 if needed.
         f2 = fea2[b]
         if should_flip == 1:
             f2 = kornia.geometry.transform.hflip(f2)
         # Resize the input features to match
-        f1s = F.interpolate(fea1[b].unsqueeze(0), (f1_h, f1_w), mode="bilinear")
-        f2s = F.interpolate(f2.unsqueeze(0), (f2_h, f2_w), mode="bilinear")
+        f1s = F.interpolate(fea1[b].unsqueeze(0), (f1_h, f1_w), mode="nearest")
+        f2s = F.interpolate(f2.unsqueeze(0), (f2_h, f2_w), mode="nearest")
         # Outputs must be padded so they can "get along" with each other.
         res1.append(pad_to(f1s[:, :, f1s_t:f1s_t+s_h, f1s_l:f1s_l+s_w], pad_dim))
         res2.append(pad_to(f2s[:, :, f2s_t:f2s_t+s_h, f2s_l:f2s_l+s_w], pad_dim))
@@ -214,9 +224,10 @@ class StructuredCropDatasetWrapper(Dataset):
         item = self.wrapped_dataset[item]
         a1 = self.aug(item['hq']).squeeze(dim=0)
         a2 = self.aug(item['lq']).squeeze(dim=0)
-        a1, a2, sr_dim, m1, m2, db = self.rrc(a1, a2)
+        a1, a2, sr_dim, m1, m2, db, i1s, i2s = self.rrc(a1, a2)
         item.update({'aug1': a1, 'aug2': a2, 'similar_region_dimensions': sr_dim,
-                     'masked1': m1, 'masked2': m2, 'aug_shared_view': db})
+                     'masked1': m1, 'masked2': m2, 'aug_shared_view': db,
+                     'i1_shared': i1s, 'i2_shared': i2s})
         return item
 
     def __len__(self):
@@ -240,7 +251,7 @@ if __name__ == '__main__':
             'num_corrupts_per_image': 1,
             'corrupt_before_downsize': True,
             },
-        'latent_multiple': 8,
+        'latent_multiple': 16,
         'jitter_range': 0,
     }
 
@@ -254,8 +265,8 @@ if __name__ == '__main__':
             #if k in [ 'aug_shared_view', 'masked1', 'masked2']:
                 #torchvision.utils.save_image(v.unsqueeze(0), "debug/%i_%s.png" % (i, k))
         rcpkg = o['similar_region_dimensions']
-        pixun = PixelUnshuffle(8)
-        pixsh = nn.PixelShuffle(8)
-        rc1, rc2 = reconstructed_shared_regions(pixun(o['aug1'].unsqueeze(0)), pixun(o['aug2'].unsqueeze(0)), rcpkg)
+        pixun = PixelUnshuffle(16)
+        pixsh = nn.PixelShuffle(16)
+        rc1, rc2 = reconstructed_shared_regions(pixun(o['aug1'].unsqueeze(0)), pixun(o['aug2'].unsqueeze(0)), rcpkg.unsqueeze(0))
         #torchvision.utils.save_image(pixsh(rc1), "debug/%i_rc1.png" % (i,))
         #torchvision.utils.save_image(pixsh(rc2), "debug/%i_rc2.png" % (i,))
diff --git a/codes/models/byol/byol_structural.py b/codes/models/byol/byol_structural.py
index 3ebe5fcf..17d5cfbb 100644
--- a/codes/models/byol/byol_structural.py
+++ b/codes/models/byol/byol_structural.py
@@ -113,6 +113,7 @@ class StructuralBYOL(nn.Module):
         if pretrained_state_dict:
             net.load_state_dict(torch.load(pretrained_state_dict), strict=True)
         self.freeze_until = freeze_until
+        self.frozen = False
         if self.freeze_until > 0:
             for p in net.parameters():
                 p.DO_NOT_TRAIN = True
diff --git a/codes/scripts/byol_extract_wrapped_model.py b/codes/scripts/byol_extract_wrapped_model.py
index 545e51d9..f5e80c54 100644
--- a/codes/scripts/byol_extract_wrapped_model.py
+++ b/codes/scripts/byol_extract_wrapped_model.py
@@ -3,7 +3,7 @@ import torch
 from models.archs.spinenet_arch import SpineNet
 
 if __name__ == '__main__':
-    pretrained_path = '../../experiments/train_sbyol_512unsupervised/models/35000_generator.pth'
+    pretrained_path = '../../experiments/train_sbyol_512unsupervised_restart/models/48000_generator.pth'
     output_path = '../../experiments/spinenet49_imgset_sbyol.pth'
 
     wrap_key = 'online_encoder.net.'
diff --git a/codes/scripts/byol_spinenet_playground.py b/codes/scripts/byol_spinenet_playground.py
index 33558b85..c28b955e 100644
--- a/codes/scripts/byol_spinenet_playground.py
+++ b/codes/scripts/byol_spinenet_playground.py
@@ -171,21 +171,20 @@ def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=str
         t = lat_patch_size * u[1]
         l = lat_patch_size * u[2]
         patch = img[:, t:t + lat_patch_size, l:l + lat_patch_size]
-        img_out[:,:, h_ * lat_patch_size:h_ * lat_patch_size + lat_patch_size,
-        w_ * lat_patch_size:w_ * lat_patch_size + lat_patch_size] = patch
+        io_loc_t = h_ * lat_patch_size
+        io_loc_l = w_ * lat_patch_size
+        img_out[:,:,io_loc_t:io_loc_t+lat_patch_size,io_loc_l:io_loc_l+lat_patch_size] = patch
 
         # Also save the image with a masked map
         mask = torch.full_like(img, fill_value=.3)
         mask[:, t:t + lat_patch_size, l:l + lat_patch_size] = 1
         masked_img = img * mask
-        masked_src_img_output_file = os.path.join(output_path, "%i_%i__%i.png" % (t, l, u[0]))
+        masked_src_img_output_file = os.path.join(output_path, "%i_%i__%i.png" % (io_loc_t, io_loc_l, u[0]))
         torchvision.utils.save_image(masked_img, masked_src_img_output_file)
 
         # Update the image map areas.
-        img_map_areas.append('<area shape="rect" coords="%i,%i,%i,%i" href="%s">' % (w_ * lat_patch_size,
-                                                                                     h_ * lat_patch_size,
-                                                                                     w_ * lat_patch_size + lat_patch_size,
-                                                                                     h_ * lat_patch_size + lat_patch_size,
+        img_map_areas.append('<area shape="rect" coords="%i,%i,%i,%i" href="%s">' % (io_loc_l, io_loc_t,
+                                                                                     io_loc_l + lat_patch_size, io_loc_t + lat_patch_size,
                                                                                      masked_src_img_output_file))
     torchvision.utils.save_image(img_out, os.path.join(output_path, "output.png"))
     torchvision.utils.save_image(img_t, os.path.join(output_path, "source.png"))
@@ -226,20 +225,20 @@ class BYOLModelWrapper(nn.Module):
 
 
 if __name__ == '__main__':
-    util.loaded_options = {'checkpointing_enabled': True}
     pretrained_path = '../../experiments/spinenet49_imgset_sbyol.pth'
     model = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')
     model.load_state_dict(torch.load(pretrained_path), strict=True)
     model.eval()
 
-    #pretrained_path = '../../experiments/train_sbyol_512unsupervised/models/35000_generator.pth'
+    #util.loaded_options = {'checkpointing_enabled': True}
+    #pretrained_path = '../../experiments/train_sbyol_512unsupervised_restart/models/48000_generator.pth'
     #from models.byol.byol_structural import StructuralBYOL
     #subnet = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')
-    #model = StructuralBYOL(subnet, image_size=256, hidden_layer='endpoint_convs.3.conv')
+    #model = StructuralBYOL(subnet, image_size=256, hidden_layer='endpoint_convs.4.conv')
     #model.load_state_dict(torch.load(pretrained_path), strict=True)
     #model = BYOLModelWrapper(model)
     #model.eval()
 
     with torch.no_grad():
-        #create_latent_database(model, 0)    # 0 = model output dimension to use for latent storage
-        find_similar_latents(model, 0, 8, structural_euc_dist)  # 1 = model output dimension to use for latent predictor.
+        #create_latent_database(model, 1)    # 0 = model output dimension to use for latent storage
+        find_similar_latents(model, 1, 16, structural_euc_dist)  # 1 = model output dimension to use for latent predictor.