BYOL mods

2020-12-14 23:59:11 -07:00 · 2020-12-14 23:59:11 -07:00 · 0a19e53df0
commit 0a19e53df0
parent ef7eabf457
4 changed files with 37 additions and 26 deletions
--- a/codes/data/byol_attachment.py
+++ b/codes/data/byol_attachment.py
@ -110,14 +110,14 @@ class RandomSharedRegionCrop(nn.Module):
        d = d // self.multiple

        # Step 2
-        base_w = random.randint(d//2, d-1)
+        base_w = random.randint(d//2+1, d-1)
        base_l = random.randint(0, d-base_w)
        base_h = random.randint(base_w-1, base_w+1)
        base_t = random.randint(0, d-base_h)
        base_r, base_b = base_l+base_w, base_t+base_h

        # Step 3
-        im2_w = random.randint(d//2, d-1)
+        im2_w = random.randint(d//2+1, d-1)
        im2_l = random.randint(0, d-im2_w)
        im2_h = random.randint(im2_w-1, im2_w+1)
        im2_t = random.randint(0, d-im2_h)
@ -153,7 +153,7 @@ class RandomSharedRegionCrop(nn.Module):
        i2_shared_t, i2_shared_l = snap(im2_t, base_t), snap(im2_l, base_l)
        ix_h = min(base_b, im2_b) - max(base_t, im2_t)
        ix_w = min(base_r, im2_r) - max(base_l, im2_l)
-        recompute_package = torch.tensor([base_h, base_w, i1_shared_t, i1_shared_l, im2_h, im2_w, i2_shared_t, i2_shared_l, should_flip, ix_h, ix_w], dtype=torch.long)
+        recompute_package = torch.tensor([d, base_h, base_w, i1_shared_t, i1_shared_l, im2_h, im2_w, i2_shared_t, i2_shared_l, should_flip, ix_h, ix_w], dtype=torch.long)

        # Step 7
        mask1 = torch.full((1, base_h*m, base_w*m), fill_value=.5)
@ -167,7 +167,14 @@ class RandomSharedRegionCrop(nn.Module):
        mask[:, im2_t*m:(im2_t+im2_w)*m, im2_l*m:(im2_l+im2_h)*m] += .33
        masked_dbg = i1 * mask

-        return p1_resized, p2_resized, recompute_package, masked1, masked2, masked_dbg
+        # Step 8 - Rebuild shared regions for testing purposes.
+        p1_shuf, p2_shuf = PixelUnshuffle(self.multiple)(p1_resized.unsqueeze(0)), \
+                           PixelUnshuffle(self.multiple)(p2_resized.unsqueeze(0))
+        i1_shared, i2_shared = reconstructed_shared_regions(p1_shuf, p2_shuf, recompute_package.unsqueeze(0))
+        i1_shared = pad_to(nn.PixelShuffle(self.multiple)(i1_shared).squeeze(0), d * m)
+        i2_shared = pad_to(nn.PixelShuffle(self.multiple)(i2_shared).squeeze(0), d*m)
+
+        return p1_resized, p2_resized, recompute_package, masked1, masked2, masked_dbg, i1_shared, i2_shared


 # Uses the recompute package returned from the above dataset to extract matched-size "similar regions" from two feature
@ -180,14 +187,17 @@ def reconstructed_shared_regions(fea1, fea2, recompute_package: torch.Tensor):
    # It'd be real nice if we could do this at the batch level, but I don't see a really good way to do that outside
    # of conforming the recompute_package across the entire batch.
    for b in range(package.shape[0]):
-        f1_h, f1_w, f1s_t, f1s_l, f2_h, f2_w, f2s_t, f2s_l, should_flip, s_h, s_w = tuple(package[b].tolist())
+        expected_dim, f1_h, f1_w, f1s_t, f1s_l, f2_h, f2_w, f2s_t, f2s_l, should_flip, s_h, s_w = tuple(package[b].tolist())
+        # If you are hitting this assert, you specified `latent_multiple` in your dataset config wrong.
+        assert expected_dim == fea1.shape[2] and expected_dim == fea2.shape[2]
+
        # Unflip 2 if needed.
        f2 = fea2[b]
        if should_flip == 1:
            f2 = kornia.geometry.transform.hflip(f2)
        # Resize the input features to match
-        f1s = F.interpolate(fea1[b].unsqueeze(0), (f1_h, f1_w), mode="bilinear")
-        f2s = F.interpolate(f2.unsqueeze(0), (f2_h, f2_w), mode="bilinear")
+        f1s = F.interpolate(fea1[b].unsqueeze(0), (f1_h, f1_w), mode="nearest")
+        f2s = F.interpolate(f2.unsqueeze(0), (f2_h, f2_w), mode="nearest")
        # Outputs must be padded so they can "get along" with each other.
        res1.append(pad_to(f1s[:, :, f1s_t:f1s_t+s_h, f1s_l:f1s_l+s_w], pad_dim))
        res2.append(pad_to(f2s[:, :, f2s_t:f2s_t+s_h, f2s_l:f2s_l+s_w], pad_dim))
@ -214,9 +224,10 @@ class StructuredCropDatasetWrapper(Dataset):
        item = self.wrapped_dataset[item]
        a1 = self.aug(item['hq']).squeeze(dim=0)
        a2 = self.aug(item['lq']).squeeze(dim=0)
-        a1, a2, sr_dim, m1, m2, db = self.rrc(a1, a2)
+        a1, a2, sr_dim, m1, m2, db, i1s, i2s = self.rrc(a1, a2)
        item.update({'aug1': a1, 'aug2': a2, 'similar_region_dimensions': sr_dim,
-                     'masked1': m1, 'masked2': m2, 'aug_shared_view': db})
+                     'masked1': m1, 'masked2': m2, 'aug_shared_view': db,
+                     'i1_shared': i1s, 'i2_shared': i2s})
        return item

    def __len__(self):
@ -240,7 +251,7 @@ if __name__ == '__main__':
            'num_corrupts_per_image': 1,
            'corrupt_before_downsize': True,
            },
-        'latent_multiple': 8,
+        'latent_multiple': 16,
        'jitter_range': 0,
    }

@ -254,8 +265,8 @@ if __name__ == '__main__':
            #if k in [ 'aug_shared_view', 'masked1', 'masked2']:
                #torchvision.utils.save_image(v.unsqueeze(0), "debug/%i_%s.png" % (i, k))
        rcpkg = o['similar_region_dimensions']
-        pixun = PixelUnshuffle(8)
-        pixsh = nn.PixelShuffle(8)
-        rc1, rc2 = reconstructed_shared_regions(pixun(o['aug1'].unsqueeze(0)), pixun(o['aug2'].unsqueeze(0)), rcpkg)
+        pixun = PixelUnshuffle(16)
+        pixsh = nn.PixelShuffle(16)
+        rc1, rc2 = reconstructed_shared_regions(pixun(o['aug1'].unsqueeze(0)), pixun(o['aug2'].unsqueeze(0)), rcpkg.unsqueeze(0))
        #torchvision.utils.save_image(pixsh(rc1), "debug/%i_rc1.png" % (i,))
        #torchvision.utils.save_image(pixsh(rc2), "debug/%i_rc2.png" % (i,))
--- a/codes/models/byol/byol_structural.py
+++ b/codes/models/byol/byol_structural.py
@ -113,6 +113,7 @@ class StructuralBYOL(nn.Module):
        if pretrained_state_dict:
            net.load_state_dict(torch.load(pretrained_state_dict), strict=True)
        self.freeze_until = freeze_until
+        self.frozen = False
        if self.freeze_until > 0:
            for p in net.parameters():
                p.DO_NOT_TRAIN = True
--- a/codes/scripts/byol_extract_wrapped_model.py
+++ b/codes/scripts/byol_extract_wrapped_model.py
@ -3,7 +3,7 @@ import torch
 from models.archs.spinenet_arch import SpineNet

 if __name__ == '__main__':
-    pretrained_path = '../../experiments/train_sbyol_512unsupervised/models/35000_generator.pth'
+    pretrained_path = '../../experiments/train_sbyol_512unsupervised_restart/models/48000_generator.pth'
    output_path = '../../experiments/spinenet49_imgset_sbyol.pth'

    wrap_key = 'online_encoder.net.'
--- a/codes/scripts/byol_spinenet_playground.py
+++ b/codes/scripts/byol_spinenet_playground.py
@ -171,21 +171,20 @@ def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=str
        t = lat_patch_size * u[1]
        l = lat_patch_size * u[2]
        patch = img[:, t:t + lat_patch_size, l:l + lat_patch_size]
-        img_out[:,:, h_ * lat_patch_size:h_ * lat_patch_size + lat_patch_size,
-        w_ * lat_patch_size:w_ * lat_patch_size + lat_patch_size] = patch
+        io_loc_t = h_ * lat_patch_size
+        io_loc_l = w_ * lat_patch_size
+        img_out[:,:,io_loc_t:io_loc_t+lat_patch_size,io_loc_l:io_loc_l+lat_patch_size] = patch

        # Also save the image with a masked map
        mask = torch.full_like(img, fill_value=.3)
        mask[:, t:t + lat_patch_size, l:l + lat_patch_size] = 1
        masked_img = img * mask
-        masked_src_img_output_file = os.path.join(output_path, "%i_%i__%i.png" % (t, l, u[0]))
+        masked_src_img_output_file = os.path.join(output_path, "%i_%i__%i.png" % (io_loc_t, io_loc_l, u[0]))
        torchvision.utils.save_image(masked_img, masked_src_img_output_file)

        # Update the image map areas.
-        img_map_areas.append('<area shape="rect" coords="%i,%i,%i,%i" href="%s">' % (w_ * lat_patch_size,
-                                                                                     h_ * lat_patch_size,
-                                                                                     w_ * lat_patch_size + lat_patch_size,
-                                                                                     h_ * lat_patch_size + lat_patch_size,
+        img_map_areas.append('<area shape="rect" coords="%i,%i,%i,%i" href="%s">' % (io_loc_l, io_loc_t,
+                                                                                     io_loc_l + lat_patch_size, io_loc_t + lat_patch_size,
                                                                                     masked_src_img_output_file))
    torchvision.utils.save_image(img_out, os.path.join(output_path, "output.png"))
    torchvision.utils.save_image(img_t, os.path.join(output_path, "source.png"))
@ -226,20 +225,20 @@ class BYOLModelWrapper(nn.Module):


 if __name__ == '__main__':
-    util.loaded_options = {'checkpointing_enabled': True}
    pretrained_path = '../../experiments/spinenet49_imgset_sbyol.pth'
    model = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')
    model.load_state_dict(torch.load(pretrained_path), strict=True)
    model.eval()

-    #pretrained_path = '../../experiments/train_sbyol_512unsupervised/models/35000_generator.pth'
+    #util.loaded_options = {'checkpointing_enabled': True}
+    #pretrained_path = '../../experiments/train_sbyol_512unsupervised_restart/models/48000_generator.pth'
    #from models.byol.byol_structural import StructuralBYOL
    #subnet = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')
-    #model = StructuralBYOL(subnet, image_size=256, hidden_layer='endpoint_convs.3.conv')
+    #model = StructuralBYOL(subnet, image_size=256, hidden_layer='endpoint_convs.4.conv')
    #model.load_state_dict(torch.load(pretrained_path), strict=True)
    #model = BYOLModelWrapper(model)
    #model.eval()

    with torch.no_grad():
-        #create_latent_database(model, 0)    # 0 = model output dimension to use for latent storage
-        find_similar_latents(model, 0, 8, structural_euc_dist)  # 1 = model output dimension to use for latent predictor.
+        #create_latent_database(model, 1)    # 0 = model output dimension to use for latent storage
+        find_similar_latents(model, 1, 16, structural_euc_dist)  # 1 = model output dimension to use for latent predictor.