BYOL mods

This commit is contained in:
James Betker 2020-12-14 23:59:11 -07:00
parent ef7eabf457
commit 0a19e53df0
4 changed files with 37 additions and 26 deletions

View File

@ -110,14 +110,14 @@ class RandomSharedRegionCrop(nn.Module):
d = d // self.multiple d = d // self.multiple
# Step 2 # Step 2
base_w = random.randint(d//2, d-1) base_w = random.randint(d//2+1, d-1)
base_l = random.randint(0, d-base_w) base_l = random.randint(0, d-base_w)
base_h = random.randint(base_w-1, base_w+1) base_h = random.randint(base_w-1, base_w+1)
base_t = random.randint(0, d-base_h) base_t = random.randint(0, d-base_h)
base_r, base_b = base_l+base_w, base_t+base_h base_r, base_b = base_l+base_w, base_t+base_h
# Step 3 # Step 3
im2_w = random.randint(d//2, d-1) im2_w = random.randint(d//2+1, d-1)
im2_l = random.randint(0, d-im2_w) im2_l = random.randint(0, d-im2_w)
im2_h = random.randint(im2_w-1, im2_w+1) im2_h = random.randint(im2_w-1, im2_w+1)
im2_t = random.randint(0, d-im2_h) im2_t = random.randint(0, d-im2_h)
@ -153,7 +153,7 @@ class RandomSharedRegionCrop(nn.Module):
i2_shared_t, i2_shared_l = snap(im2_t, base_t), snap(im2_l, base_l) i2_shared_t, i2_shared_l = snap(im2_t, base_t), snap(im2_l, base_l)
ix_h = min(base_b, im2_b) - max(base_t, im2_t) ix_h = min(base_b, im2_b) - max(base_t, im2_t)
ix_w = min(base_r, im2_r) - max(base_l, im2_l) ix_w = min(base_r, im2_r) - max(base_l, im2_l)
recompute_package = torch.tensor([base_h, base_w, i1_shared_t, i1_shared_l, im2_h, im2_w, i2_shared_t, i2_shared_l, should_flip, ix_h, ix_w], dtype=torch.long) recompute_package = torch.tensor([d, base_h, base_w, i1_shared_t, i1_shared_l, im2_h, im2_w, i2_shared_t, i2_shared_l, should_flip, ix_h, ix_w], dtype=torch.long)
# Step 7 # Step 7
mask1 = torch.full((1, base_h*m, base_w*m), fill_value=.5) mask1 = torch.full((1, base_h*m, base_w*m), fill_value=.5)
@ -167,7 +167,14 @@ class RandomSharedRegionCrop(nn.Module):
mask[:, im2_t*m:(im2_t+im2_w)*m, im2_l*m:(im2_l+im2_h)*m] += .33 mask[:, im2_t*m:(im2_t+im2_w)*m, im2_l*m:(im2_l+im2_h)*m] += .33
masked_dbg = i1 * mask masked_dbg = i1 * mask
return p1_resized, p2_resized, recompute_package, masked1, masked2, masked_dbg # Step 8 - Rebuild shared regions for testing purposes.
p1_shuf, p2_shuf = PixelUnshuffle(self.multiple)(p1_resized.unsqueeze(0)), \
PixelUnshuffle(self.multiple)(p2_resized.unsqueeze(0))
i1_shared, i2_shared = reconstructed_shared_regions(p1_shuf, p2_shuf, recompute_package.unsqueeze(0))
i1_shared = pad_to(nn.PixelShuffle(self.multiple)(i1_shared).squeeze(0), d * m)
i2_shared = pad_to(nn.PixelShuffle(self.multiple)(i2_shared).squeeze(0), d*m)
return p1_resized, p2_resized, recompute_package, masked1, masked2, masked_dbg, i1_shared, i2_shared
# Uses the recompute package returned from the above dataset to extract matched-size "similar regions" from two feature # Uses the recompute package returned from the above dataset to extract matched-size "similar regions" from two feature
@ -180,14 +187,17 @@ def reconstructed_shared_regions(fea1, fea2, recompute_package: torch.Tensor):
# It'd be real nice if we could do this at the batch level, but I don't see a really good way to do that outside # It'd be real nice if we could do this at the batch level, but I don't see a really good way to do that outside
# of conforming the recompute_package across the entire batch. # of conforming the recompute_package across the entire batch.
for b in range(package.shape[0]): for b in range(package.shape[0]):
f1_h, f1_w, f1s_t, f1s_l, f2_h, f2_w, f2s_t, f2s_l, should_flip, s_h, s_w = tuple(package[b].tolist()) expected_dim, f1_h, f1_w, f1s_t, f1s_l, f2_h, f2_w, f2s_t, f2s_l, should_flip, s_h, s_w = tuple(package[b].tolist())
# If you are hitting this assert, you specified `latent_multiple` in your dataset config wrong.
assert expected_dim == fea1.shape[2] and expected_dim == fea2.shape[2]
# Unflip 2 if needed. # Unflip 2 if needed.
f2 = fea2[b] f2 = fea2[b]
if should_flip == 1: if should_flip == 1:
f2 = kornia.geometry.transform.hflip(f2) f2 = kornia.geometry.transform.hflip(f2)
# Resize the input features to match # Resize the input features to match
f1s = F.interpolate(fea1[b].unsqueeze(0), (f1_h, f1_w), mode="bilinear") f1s = F.interpolate(fea1[b].unsqueeze(0), (f1_h, f1_w), mode="nearest")
f2s = F.interpolate(f2.unsqueeze(0), (f2_h, f2_w), mode="bilinear") f2s = F.interpolate(f2.unsqueeze(0), (f2_h, f2_w), mode="nearest")
# Outputs must be padded so they can "get along" with each other. # Outputs must be padded so they can "get along" with each other.
res1.append(pad_to(f1s[:, :, f1s_t:f1s_t+s_h, f1s_l:f1s_l+s_w], pad_dim)) res1.append(pad_to(f1s[:, :, f1s_t:f1s_t+s_h, f1s_l:f1s_l+s_w], pad_dim))
res2.append(pad_to(f2s[:, :, f2s_t:f2s_t+s_h, f2s_l:f2s_l+s_w], pad_dim)) res2.append(pad_to(f2s[:, :, f2s_t:f2s_t+s_h, f2s_l:f2s_l+s_w], pad_dim))
@ -214,9 +224,10 @@ class StructuredCropDatasetWrapper(Dataset):
item = self.wrapped_dataset[item] item = self.wrapped_dataset[item]
a1 = self.aug(item['hq']).squeeze(dim=0) a1 = self.aug(item['hq']).squeeze(dim=0)
a2 = self.aug(item['lq']).squeeze(dim=0) a2 = self.aug(item['lq']).squeeze(dim=0)
a1, a2, sr_dim, m1, m2, db = self.rrc(a1, a2) a1, a2, sr_dim, m1, m2, db, i1s, i2s = self.rrc(a1, a2)
item.update({'aug1': a1, 'aug2': a2, 'similar_region_dimensions': sr_dim, item.update({'aug1': a1, 'aug2': a2, 'similar_region_dimensions': sr_dim,
'masked1': m1, 'masked2': m2, 'aug_shared_view': db}) 'masked1': m1, 'masked2': m2, 'aug_shared_view': db,
'i1_shared': i1s, 'i2_shared': i2s})
return item return item
def __len__(self): def __len__(self):
@ -240,7 +251,7 @@ if __name__ == '__main__':
'num_corrupts_per_image': 1, 'num_corrupts_per_image': 1,
'corrupt_before_downsize': True, 'corrupt_before_downsize': True,
}, },
'latent_multiple': 8, 'latent_multiple': 16,
'jitter_range': 0, 'jitter_range': 0,
} }
@ -254,8 +265,8 @@ if __name__ == '__main__':
#if k in [ 'aug_shared_view', 'masked1', 'masked2']: #if k in [ 'aug_shared_view', 'masked1', 'masked2']:
#torchvision.utils.save_image(v.unsqueeze(0), "debug/%i_%s.png" % (i, k)) #torchvision.utils.save_image(v.unsqueeze(0), "debug/%i_%s.png" % (i, k))
rcpkg = o['similar_region_dimensions'] rcpkg = o['similar_region_dimensions']
pixun = PixelUnshuffle(8) pixun = PixelUnshuffle(16)
pixsh = nn.PixelShuffle(8) pixsh = nn.PixelShuffle(16)
rc1, rc2 = reconstructed_shared_regions(pixun(o['aug1'].unsqueeze(0)), pixun(o['aug2'].unsqueeze(0)), rcpkg) rc1, rc2 = reconstructed_shared_regions(pixun(o['aug1'].unsqueeze(0)), pixun(o['aug2'].unsqueeze(0)), rcpkg.unsqueeze(0))
#torchvision.utils.save_image(pixsh(rc1), "debug/%i_rc1.png" % (i,)) #torchvision.utils.save_image(pixsh(rc1), "debug/%i_rc1.png" % (i,))
#torchvision.utils.save_image(pixsh(rc2), "debug/%i_rc2.png" % (i,)) #torchvision.utils.save_image(pixsh(rc2), "debug/%i_rc2.png" % (i,))

View File

@ -113,6 +113,7 @@ class StructuralBYOL(nn.Module):
if pretrained_state_dict: if pretrained_state_dict:
net.load_state_dict(torch.load(pretrained_state_dict), strict=True) net.load_state_dict(torch.load(pretrained_state_dict), strict=True)
self.freeze_until = freeze_until self.freeze_until = freeze_until
self.frozen = False
if self.freeze_until > 0: if self.freeze_until > 0:
for p in net.parameters(): for p in net.parameters():
p.DO_NOT_TRAIN = True p.DO_NOT_TRAIN = True

View File

@ -3,7 +3,7 @@ import torch
from models.archs.spinenet_arch import SpineNet from models.archs.spinenet_arch import SpineNet
if __name__ == '__main__': if __name__ == '__main__':
pretrained_path = '../../experiments/train_sbyol_512unsupervised/models/35000_generator.pth' pretrained_path = '../../experiments/train_sbyol_512unsupervised_restart/models/48000_generator.pth'
output_path = '../../experiments/spinenet49_imgset_sbyol.pth' output_path = '../../experiments/spinenet49_imgset_sbyol.pth'
wrap_key = 'online_encoder.net.' wrap_key = 'online_encoder.net.'

View File

@ -171,21 +171,20 @@ def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=str
t = lat_patch_size * u[1] t = lat_patch_size * u[1]
l = lat_patch_size * u[2] l = lat_patch_size * u[2]
patch = img[:, t:t + lat_patch_size, l:l + lat_patch_size] patch = img[:, t:t + lat_patch_size, l:l + lat_patch_size]
img_out[:,:, h_ * lat_patch_size:h_ * lat_patch_size + lat_patch_size, io_loc_t = h_ * lat_patch_size
w_ * lat_patch_size:w_ * lat_patch_size + lat_patch_size] = patch io_loc_l = w_ * lat_patch_size
img_out[:,:,io_loc_t:io_loc_t+lat_patch_size,io_loc_l:io_loc_l+lat_patch_size] = patch
# Also save the image with a masked map # Also save the image with a masked map
mask = torch.full_like(img, fill_value=.3) mask = torch.full_like(img, fill_value=.3)
mask[:, t:t + lat_patch_size, l:l + lat_patch_size] = 1 mask[:, t:t + lat_patch_size, l:l + lat_patch_size] = 1
masked_img = img * mask masked_img = img * mask
masked_src_img_output_file = os.path.join(output_path, "%i_%i__%i.png" % (t, l, u[0])) masked_src_img_output_file = os.path.join(output_path, "%i_%i__%i.png" % (io_loc_t, io_loc_l, u[0]))
torchvision.utils.save_image(masked_img, masked_src_img_output_file) torchvision.utils.save_image(masked_img, masked_src_img_output_file)
# Update the image map areas. # Update the image map areas.
img_map_areas.append('<area shape="rect" coords="%i,%i,%i,%i" href="%s">' % (w_ * lat_patch_size, img_map_areas.append('<area shape="rect" coords="%i,%i,%i,%i" href="%s">' % (io_loc_l, io_loc_t,
h_ * lat_patch_size, io_loc_l + lat_patch_size, io_loc_t + lat_patch_size,
w_ * lat_patch_size + lat_patch_size,
h_ * lat_patch_size + lat_patch_size,
masked_src_img_output_file)) masked_src_img_output_file))
torchvision.utils.save_image(img_out, os.path.join(output_path, "output.png")) torchvision.utils.save_image(img_out, os.path.join(output_path, "output.png"))
torchvision.utils.save_image(img_t, os.path.join(output_path, "source.png")) torchvision.utils.save_image(img_t, os.path.join(output_path, "source.png"))
@ -226,20 +225,20 @@ class BYOLModelWrapper(nn.Module):
if __name__ == '__main__': if __name__ == '__main__':
util.loaded_options = {'checkpointing_enabled': True}
pretrained_path = '../../experiments/spinenet49_imgset_sbyol.pth' pretrained_path = '../../experiments/spinenet49_imgset_sbyol.pth'
model = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda') model = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')
model.load_state_dict(torch.load(pretrained_path), strict=True) model.load_state_dict(torch.load(pretrained_path), strict=True)
model.eval() model.eval()
#pretrained_path = '../../experiments/train_sbyol_512unsupervised/models/35000_generator.pth' #util.loaded_options = {'checkpointing_enabled': True}
#pretrained_path = '../../experiments/train_sbyol_512unsupervised_restart/models/48000_generator.pth'
#from models.byol.byol_structural import StructuralBYOL #from models.byol.byol_structural import StructuralBYOL
#subnet = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda') #subnet = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')
#model = StructuralBYOL(subnet, image_size=256, hidden_layer='endpoint_convs.3.conv') #model = StructuralBYOL(subnet, image_size=256, hidden_layer='endpoint_convs.4.conv')
#model.load_state_dict(torch.load(pretrained_path), strict=True) #model.load_state_dict(torch.load(pretrained_path), strict=True)
#model = BYOLModelWrapper(model) #model = BYOLModelWrapper(model)
#model.eval() #model.eval()
with torch.no_grad(): with torch.no_grad():
#create_latent_database(model, 0) # 0 = model output dimension to use for latent storage #create_latent_database(model, 1) # 0 = model output dimension to use for latent storage
find_similar_latents(model, 0, 8, structural_euc_dist) # 1 = model output dimension to use for latent predictor. find_similar_latents(model, 1, 16, structural_euc_dist) # 1 = model output dimension to use for latent predictor.