forked from mrq/DL-Art-School
BYOL mods
This commit is contained in:
parent
ef7eabf457
commit
0a19e53df0
|
@ -110,14 +110,14 @@ class RandomSharedRegionCrop(nn.Module):
|
|||
d = d // self.multiple
|
||||
|
||||
# Step 2
|
||||
base_w = random.randint(d//2, d-1)
|
||||
base_w = random.randint(d//2+1, d-1)
|
||||
base_l = random.randint(0, d-base_w)
|
||||
base_h = random.randint(base_w-1, base_w+1)
|
||||
base_t = random.randint(0, d-base_h)
|
||||
base_r, base_b = base_l+base_w, base_t+base_h
|
||||
|
||||
# Step 3
|
||||
im2_w = random.randint(d//2, d-1)
|
||||
im2_w = random.randint(d//2+1, d-1)
|
||||
im2_l = random.randint(0, d-im2_w)
|
||||
im2_h = random.randint(im2_w-1, im2_w+1)
|
||||
im2_t = random.randint(0, d-im2_h)
|
||||
|
@ -153,7 +153,7 @@ class RandomSharedRegionCrop(nn.Module):
|
|||
i2_shared_t, i2_shared_l = snap(im2_t, base_t), snap(im2_l, base_l)
|
||||
ix_h = min(base_b, im2_b) - max(base_t, im2_t)
|
||||
ix_w = min(base_r, im2_r) - max(base_l, im2_l)
|
||||
recompute_package = torch.tensor([base_h, base_w, i1_shared_t, i1_shared_l, im2_h, im2_w, i2_shared_t, i2_shared_l, should_flip, ix_h, ix_w], dtype=torch.long)
|
||||
recompute_package = torch.tensor([d, base_h, base_w, i1_shared_t, i1_shared_l, im2_h, im2_w, i2_shared_t, i2_shared_l, should_flip, ix_h, ix_w], dtype=torch.long)
|
||||
|
||||
# Step 7
|
||||
mask1 = torch.full((1, base_h*m, base_w*m), fill_value=.5)
|
||||
|
@ -167,7 +167,14 @@ class RandomSharedRegionCrop(nn.Module):
|
|||
mask[:, im2_t*m:(im2_t+im2_w)*m, im2_l*m:(im2_l+im2_h)*m] += .33
|
||||
masked_dbg = i1 * mask
|
||||
|
||||
return p1_resized, p2_resized, recompute_package, masked1, masked2, masked_dbg
|
||||
# Step 8 - Rebuild shared regions for testing purposes.
|
||||
p1_shuf, p2_shuf = PixelUnshuffle(self.multiple)(p1_resized.unsqueeze(0)), \
|
||||
PixelUnshuffle(self.multiple)(p2_resized.unsqueeze(0))
|
||||
i1_shared, i2_shared = reconstructed_shared_regions(p1_shuf, p2_shuf, recompute_package.unsqueeze(0))
|
||||
i1_shared = pad_to(nn.PixelShuffle(self.multiple)(i1_shared).squeeze(0), d * m)
|
||||
i2_shared = pad_to(nn.PixelShuffle(self.multiple)(i2_shared).squeeze(0), d*m)
|
||||
|
||||
return p1_resized, p2_resized, recompute_package, masked1, masked2, masked_dbg, i1_shared, i2_shared
|
||||
|
||||
|
||||
# Uses the recompute package returned from the above dataset to extract matched-size "similar regions" from two feature
|
||||
|
@ -180,14 +187,17 @@ def reconstructed_shared_regions(fea1, fea2, recompute_package: torch.Tensor):
|
|||
# It'd be real nice if we could do this at the batch level, but I don't see a really good way to do that outside
|
||||
# of conforming the recompute_package across the entire batch.
|
||||
for b in range(package.shape[0]):
|
||||
f1_h, f1_w, f1s_t, f1s_l, f2_h, f2_w, f2s_t, f2s_l, should_flip, s_h, s_w = tuple(package[b].tolist())
|
||||
expected_dim, f1_h, f1_w, f1s_t, f1s_l, f2_h, f2_w, f2s_t, f2s_l, should_flip, s_h, s_w = tuple(package[b].tolist())
|
||||
# If you are hitting this assert, you specified `latent_multiple` in your dataset config wrong.
|
||||
assert expected_dim == fea1.shape[2] and expected_dim == fea2.shape[2]
|
||||
|
||||
# Unflip 2 if needed.
|
||||
f2 = fea2[b]
|
||||
if should_flip == 1:
|
||||
f2 = kornia.geometry.transform.hflip(f2)
|
||||
# Resize the input features to match
|
||||
f1s = F.interpolate(fea1[b].unsqueeze(0), (f1_h, f1_w), mode="bilinear")
|
||||
f2s = F.interpolate(f2.unsqueeze(0), (f2_h, f2_w), mode="bilinear")
|
||||
f1s = F.interpolate(fea1[b].unsqueeze(0), (f1_h, f1_w), mode="nearest")
|
||||
f2s = F.interpolate(f2.unsqueeze(0), (f2_h, f2_w), mode="nearest")
|
||||
# Outputs must be padded so they can "get along" with each other.
|
||||
res1.append(pad_to(f1s[:, :, f1s_t:f1s_t+s_h, f1s_l:f1s_l+s_w], pad_dim))
|
||||
res2.append(pad_to(f2s[:, :, f2s_t:f2s_t+s_h, f2s_l:f2s_l+s_w], pad_dim))
|
||||
|
@ -214,9 +224,10 @@ class StructuredCropDatasetWrapper(Dataset):
|
|||
item = self.wrapped_dataset[item]
|
||||
a1 = self.aug(item['hq']).squeeze(dim=0)
|
||||
a2 = self.aug(item['lq']).squeeze(dim=0)
|
||||
a1, a2, sr_dim, m1, m2, db = self.rrc(a1, a2)
|
||||
a1, a2, sr_dim, m1, m2, db, i1s, i2s = self.rrc(a1, a2)
|
||||
item.update({'aug1': a1, 'aug2': a2, 'similar_region_dimensions': sr_dim,
|
||||
'masked1': m1, 'masked2': m2, 'aug_shared_view': db})
|
||||
'masked1': m1, 'masked2': m2, 'aug_shared_view': db,
|
||||
'i1_shared': i1s, 'i2_shared': i2s})
|
||||
return item
|
||||
|
||||
def __len__(self):
|
||||
|
@ -240,7 +251,7 @@ if __name__ == '__main__':
|
|||
'num_corrupts_per_image': 1,
|
||||
'corrupt_before_downsize': True,
|
||||
},
|
||||
'latent_multiple': 8,
|
||||
'latent_multiple': 16,
|
||||
'jitter_range': 0,
|
||||
}
|
||||
|
||||
|
@ -254,8 +265,8 @@ if __name__ == '__main__':
|
|||
#if k in [ 'aug_shared_view', 'masked1', 'masked2']:
|
||||
#torchvision.utils.save_image(v.unsqueeze(0), "debug/%i_%s.png" % (i, k))
|
||||
rcpkg = o['similar_region_dimensions']
|
||||
pixun = PixelUnshuffle(8)
|
||||
pixsh = nn.PixelShuffle(8)
|
||||
rc1, rc2 = reconstructed_shared_regions(pixun(o['aug1'].unsqueeze(0)), pixun(o['aug2'].unsqueeze(0)), rcpkg)
|
||||
pixun = PixelUnshuffle(16)
|
||||
pixsh = nn.PixelShuffle(16)
|
||||
rc1, rc2 = reconstructed_shared_regions(pixun(o['aug1'].unsqueeze(0)), pixun(o['aug2'].unsqueeze(0)), rcpkg.unsqueeze(0))
|
||||
#torchvision.utils.save_image(pixsh(rc1), "debug/%i_rc1.png" % (i,))
|
||||
#torchvision.utils.save_image(pixsh(rc2), "debug/%i_rc2.png" % (i,))
|
||||
|
|
|
@ -113,6 +113,7 @@ class StructuralBYOL(nn.Module):
|
|||
if pretrained_state_dict:
|
||||
net.load_state_dict(torch.load(pretrained_state_dict), strict=True)
|
||||
self.freeze_until = freeze_until
|
||||
self.frozen = False
|
||||
if self.freeze_until > 0:
|
||||
for p in net.parameters():
|
||||
p.DO_NOT_TRAIN = True
|
||||
|
|
|
@ -3,7 +3,7 @@ import torch
|
|||
from models.archs.spinenet_arch import SpineNet
|
||||
|
||||
if __name__ == '__main__':
|
||||
pretrained_path = '../../experiments/train_sbyol_512unsupervised/models/35000_generator.pth'
|
||||
pretrained_path = '../../experiments/train_sbyol_512unsupervised_restart/models/48000_generator.pth'
|
||||
output_path = '../../experiments/spinenet49_imgset_sbyol.pth'
|
||||
|
||||
wrap_key = 'online_encoder.net.'
|
||||
|
|
|
@ -171,21 +171,20 @@ def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=str
|
|||
t = lat_patch_size * u[1]
|
||||
l = lat_patch_size * u[2]
|
||||
patch = img[:, t:t + lat_patch_size, l:l + lat_patch_size]
|
||||
img_out[:,:, h_ * lat_patch_size:h_ * lat_patch_size + lat_patch_size,
|
||||
w_ * lat_patch_size:w_ * lat_patch_size + lat_patch_size] = patch
|
||||
io_loc_t = h_ * lat_patch_size
|
||||
io_loc_l = w_ * lat_patch_size
|
||||
img_out[:,:,io_loc_t:io_loc_t+lat_patch_size,io_loc_l:io_loc_l+lat_patch_size] = patch
|
||||
|
||||
# Also save the image with a masked map
|
||||
mask = torch.full_like(img, fill_value=.3)
|
||||
mask[:, t:t + lat_patch_size, l:l + lat_patch_size] = 1
|
||||
masked_img = img * mask
|
||||
masked_src_img_output_file = os.path.join(output_path, "%i_%i__%i.png" % (t, l, u[0]))
|
||||
masked_src_img_output_file = os.path.join(output_path, "%i_%i__%i.png" % (io_loc_t, io_loc_l, u[0]))
|
||||
torchvision.utils.save_image(masked_img, masked_src_img_output_file)
|
||||
|
||||
# Update the image map areas.
|
||||
img_map_areas.append('<area shape="rect" coords="%i,%i,%i,%i" href="%s">' % (w_ * lat_patch_size,
|
||||
h_ * lat_patch_size,
|
||||
w_ * lat_patch_size + lat_patch_size,
|
||||
h_ * lat_patch_size + lat_patch_size,
|
||||
img_map_areas.append('<area shape="rect" coords="%i,%i,%i,%i" href="%s">' % (io_loc_l, io_loc_t,
|
||||
io_loc_l + lat_patch_size, io_loc_t + lat_patch_size,
|
||||
masked_src_img_output_file))
|
||||
torchvision.utils.save_image(img_out, os.path.join(output_path, "output.png"))
|
||||
torchvision.utils.save_image(img_t, os.path.join(output_path, "source.png"))
|
||||
|
@ -226,20 +225,20 @@ class BYOLModelWrapper(nn.Module):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
util.loaded_options = {'checkpointing_enabled': True}
|
||||
pretrained_path = '../../experiments/spinenet49_imgset_sbyol.pth'
|
||||
model = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')
|
||||
model.load_state_dict(torch.load(pretrained_path), strict=True)
|
||||
model.eval()
|
||||
|
||||
#pretrained_path = '../../experiments/train_sbyol_512unsupervised/models/35000_generator.pth'
|
||||
#util.loaded_options = {'checkpointing_enabled': True}
|
||||
#pretrained_path = '../../experiments/train_sbyol_512unsupervised_restart/models/48000_generator.pth'
|
||||
#from models.byol.byol_structural import StructuralBYOL
|
||||
#subnet = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')
|
||||
#model = StructuralBYOL(subnet, image_size=256, hidden_layer='endpoint_convs.3.conv')
|
||||
#model = StructuralBYOL(subnet, image_size=256, hidden_layer='endpoint_convs.4.conv')
|
||||
#model.load_state_dict(torch.load(pretrained_path), strict=True)
|
||||
#model = BYOLModelWrapper(model)
|
||||
#model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
#create_latent_database(model, 0) # 0 = model output dimension to use for latent storage
|
||||
find_similar_latents(model, 0, 8, structural_euc_dist) # 1 = model output dimension to use for latent predictor.
|
||||
#create_latent_database(model, 1) # 0 = model output dimension to use for latent storage
|
||||
find_similar_latents(model, 1, 16, structural_euc_dist) # 1 = model output dimension to use for latent predictor.
|
||||
|
|
Loading…
Reference in New Issue
Block a user