forked from mrq/DL-Art-School
Move byol scripts around
This commit is contained in:
parent
2f2f87bbea
commit
9680294430
|
@ -3,7 +3,7 @@ import torch
|
|||
from models.spinenet_arch import SpineNet
|
||||
|
||||
if __name__ == '__main__':
|
||||
pretrained_path = '../../experiments/resnet_byol_diffframe_115k.pth'
|
||||
pretrained_path = '../../../experiments/resnet_byol_diffframe_115k.pth'
|
||||
output_path = '../../experiments/resnet_byol_diffframe_115k_.pth'
|
||||
|
||||
wrap_key = 'online_encoder.net.'
|
|
@ -119,7 +119,7 @@ def produce_latent_dict(model):
|
|||
id += batch_size
|
||||
if id > 1000:
|
||||
print("Saving checkpoint..")
|
||||
torch.save((latents, paths), 'results.pth')
|
||||
torch.save((latents, paths), '../results.pth')
|
||||
id = 0
|
||||
|
||||
|
||||
|
@ -128,7 +128,7 @@ def find_similar_latents(model, compare_fn=structural_euc_dist):
|
|||
|
||||
img = 'F:\\4k6k\\datasets\\ns_images\\imagesets\\1024_test\\80692045.jpg.jpg'
|
||||
#img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\nicky_xx.jpg'
|
||||
output_path = '../../results/byol_resnet_similars'
|
||||
output_path = '../../../results/byol_resnet_similars'
|
||||
os.makedirs(output_path, exist_ok=True)
|
||||
imglatent = get_latent_for_img(model, img).squeeze().unsqueeze(0)
|
||||
_, c = imglatent.shape
|
||||
|
@ -161,7 +161,7 @@ def find_similar_latents(model, compare_fn=structural_euc_dist):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pretrained_path = '../../experiments/resnet_byol_diffframe_115k.pth'
|
||||
pretrained_path = '../../../experiments/resnet_byol_diffframe_115k.pth'
|
||||
model = resnet50(pretrained=False).to('cuda')
|
||||
sd = torch.load(pretrained_path)
|
||||
resnet_sd = {}
|
|
@ -19,6 +19,7 @@ from models.spinenet_arch import SpineNet
|
|||
# Computes the structural euclidean distance between [x,y]. "Structural" here means the [h,w] dimensions are preserved
|
||||
# and the distance is computed across the channel dimension.
|
||||
from utils import util
|
||||
from utils.options import dict_to_nonedict
|
||||
|
||||
|
||||
def structural_euc_dist(x, y):
|
||||
|
@ -50,7 +51,7 @@ def im_norm(x):
|
|||
|
||||
|
||||
def get_image_folder_dataloader(batch_size, num_workers):
|
||||
dataset_opt = {
|
||||
dataset_opt = dict_to_nonedict({
|
||||
'name': 'amalgam',
|
||||
'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\imageset_1024_square_with_new'],
|
||||
#'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\1024_test'],
|
||||
|
@ -58,15 +59,14 @@ def get_image_folder_dataloader(batch_size, num_workers):
|
|||
'target_size': 512,
|
||||
'force_multiple': 32,
|
||||
'scale': 1
|
||||
}
|
||||
})
|
||||
dataset = ImageFolderDataset(dataset_opt)
|
||||
return DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)
|
||||
|
||||
|
||||
def create_latent_database(model, model_index=0):
|
||||
batch_size = 8
|
||||
num_workers = 1
|
||||
output_path = '../../results/byol_spinenet_latents/'
|
||||
def create_latent_database(model, model_index=0, batch_size=8):
|
||||
num_workers = 4
|
||||
output_path = '../results/byol_latents/'
|
||||
|
||||
os.makedirs(output_path, exist_ok=True)
|
||||
dataloader = get_image_folder_dataloader(batch_size, num_workers)
|
||||
|
@ -76,7 +76,9 @@ def create_latent_database(model, model_index=0):
|
|||
all_paths = []
|
||||
for batch in tqdm(dataloader):
|
||||
hq = batch['hq'].to('cuda')
|
||||
latent = model(hq)[model_index] # BYOL trainer only trains the '4' output, which is indexed at [1]. Confusing.
|
||||
latent = model(hq)
|
||||
if isinstance(latent, tuple):
|
||||
latent = latent[model_index]
|
||||
for b in range(latent.shape[0]):
|
||||
im_path = batch['HQ_path'][b]
|
||||
all_paths.append(im_path)
|
||||
|
@ -124,10 +126,10 @@ def _get_mins_from_latent_dictionary(latent, hq_img_repo, ld_file_name, batch_si
|
|||
def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=structural_euc_dist):
|
||||
img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\adrianna_xx.jpg'
|
||||
#img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\nicky_xx.jpg'
|
||||
hq_img_repo = '../../results/byol_spinenet_latents'
|
||||
output_path = '../../results/byol_spinenet_similars'
|
||||
batch_size = 2048
|
||||
num_maps = 4
|
||||
hq_img_repo = '../results/byol_latents'
|
||||
output_path = '../results/byol_similars'
|
||||
batch_size = 4096
|
||||
num_maps = 1
|
||||
lat_patch_mult = 512 // lat_patch_size
|
||||
|
||||
os.makedirs(output_path, exist_ok=True)
|
||||
|
@ -135,7 +137,10 @@ def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=str
|
|||
img_t = ToTensor()(Image.open(img)).to('cuda').unsqueeze(0)
|
||||
_, _, h, w = img_t.shape
|
||||
img_t = img_t[:, :, :128*(h//128), :128*(w//128)]
|
||||
latent = model(img_t)[model_index]
|
||||
latent = model(img_t)
|
||||
if not isinstance(latent, tuple):
|
||||
latent = (latent,)
|
||||
latent = latent[model_index]
|
||||
_, c, h, w = latent.shape
|
||||
|
||||
mins, min_offsets = [], []
|
||||
|
@ -195,7 +200,7 @@ def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=str
|
|||
|
||||
def explore_latent_results(model):
|
||||
batch_size = 16
|
||||
num_workers = 1
|
||||
num_workers = 4
|
||||
output_path = '../../results/byol_spinenet_explore_latents/'
|
||||
|
||||
os.makedirs(output_path, exist_ok=True)
|
||||
|
@ -225,7 +230,7 @@ class BYOLModelWrapper(nn.Module):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pretrained_path = '../../experiments/spinenet49_imgset_sbyol.pth'
|
||||
pretrained_path = '../../../experiments/spinenet49_imgset_sbyol.pth'
|
||||
model = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')
|
||||
model.load_state_dict(torch.load(pretrained_path), strict=True)
|
||||
model.eval()
|
84
codes/scripts/byol/byol_uresnet_playground.py
Normal file
84
codes/scripts/byol/byol_uresnet_playground.py
Normal file
|
@ -0,0 +1,84 @@
|
|||
import os
|
||||
import shutil
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchvision
|
||||
from PIL import Image
|
||||
from torch.utils.data import DataLoader
|
||||
from torchvision.models.resnet import Bottleneck
|
||||
from torchvision.transforms import ToTensor, Resize
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
|
||||
import utils
|
||||
from data.image_folder_dataset import ImageFolderDataset
|
||||
from models.pixel_level_contrastive_learning.resnet_unet import UResNet50
|
||||
from models.resnet_with_checkpointing import resnet50
|
||||
from models.spinenet_arch import SpineNet
|
||||
|
||||
|
||||
# Computes the structural euclidean distance between [x,y]. "Structural" here means the [h,w] dimensions are preserved
|
||||
# and the distance is computed across the channel dimension.
|
||||
from scripts.byol.byol_spinenet_playground import find_similar_latents, create_latent_database
|
||||
from utils import util
|
||||
from utils.options import dict_to_nonedict
|
||||
|
||||
|
||||
def structural_euc_dist(x, y):
|
||||
diff = torch.square(x - y)
|
||||
sum = torch.sum(diff, dim=-1)
|
||||
return torch.sqrt(sum)
|
||||
|
||||
|
||||
def cosine_similarity(x, y):
|
||||
x = norm(x)
|
||||
y = norm(y)
|
||||
return -nn.CosineSimilarity()(x, y) # probably better to just use this class to perform the calc. Just left this here to remind myself.
|
||||
|
||||
|
||||
def key_value_difference(x, y):
|
||||
x = F.normalize(x, dim=-1, p=2)
|
||||
y = F.normalize(y, dim=-1, p=2)
|
||||
return 2 - 2 * (x * y).sum(dim=-1)
|
||||
|
||||
|
||||
def norm(x):
|
||||
sh = x.shape
|
||||
sh_r = tuple([sh[i] if i != len(sh)-1 else 1 for i in range(len(sh))])
|
||||
return (x - torch.mean(x, dim=-1).reshape(sh_r)) / torch.std(x, dim=-1).reshape(sh_r)
|
||||
|
||||
|
||||
def im_norm(x):
|
||||
return (((x - torch.mean(x, dim=(2,3)).reshape(-1,1,1,1)) / torch.std(x, dim=(2,3)).reshape(-1,1,1,1)) * .5) + .5
|
||||
|
||||
|
||||
def get_image_folder_dataloader(batch_size, num_workers):
|
||||
dataset_opt = dict_to_nonedict({
|
||||
'name': 'amalgam',
|
||||
'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\imageset_1024_square_with_new'],
|
||||
#'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\1024_test'],
|
||||
'weights': [1],
|
||||
'target_size': 256,
|
||||
'force_multiple': 32,
|
||||
'scale': 1
|
||||
})
|
||||
dataset = ImageFolderDataset(dataset_opt)
|
||||
return DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pretrained_path = '../experiments/uresnet_pixpro_83k.pth'
|
||||
model = UResNet50(Bottleneck, [3,4,6,3]).to('cuda')
|
||||
sd = torch.load(pretrained_path)
|
||||
resnet_sd = {}
|
||||
for k, v in sd.items():
|
||||
if 'target_encoder.net.' in k:
|
||||
resnet_sd[k.replace('target_encoder.net.', '')] = v
|
||||
model.load_state_dict(resnet_sd, strict=True)
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
find_similar_latents(model, 0, 8, structural_euc_dist)
|
||||
#create_latent_database(model, batch_size=32)
|
|
@ -204,7 +204,7 @@ def tsne(X, no_dims=2, initial_dims=50, perplexity=30.0):
|
|||
return Y
|
||||
|
||||
|
||||
def run_tsne():
|
||||
def run_tsne_instance_level():
|
||||
print("Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset.")
|
||||
|
||||
limit = 4000
|
||||
|
@ -236,12 +236,12 @@ def run_tsne():
|
|||
|
||||
pyplot.scatter(Y[:, 0], Y[:, 1], 20, labels)
|
||||
pyplot.show()
|
||||
torch.save((Y, files[:limit]), "tsne_output.pth")
|
||||
torch.save((Y, files[:limit]), "../tsne_output.pth")
|
||||
|
||||
|
||||
# Uses the results from the calculation above to create a **massive** pdf plot that shows 1/8 size images on the tsne
|
||||
# spectrum.
|
||||
def plot_results_as_image_graph():
|
||||
def plot_instance_level_results_as_image_graph():
|
||||
Y, files = torch.load('tsne_output.pth')
|
||||
fig, ax = pyplot.subplots()
|
||||
fig.set_size_inches(200,200,forward=True)
|
||||
|
@ -258,6 +258,80 @@ def plot_results_as_image_graph():
|
|||
pyplot.savefig('tsne.pdf')
|
||||
|
||||
|
||||
random_coords = [(16,16), (14,14), (20,20), (24,24)]
|
||||
def run_tsne_pixel_level():
|
||||
limit = 4000
|
||||
latent_dict = torch.load('../results/byol_latents/latent_dict_1.pth')
|
||||
id_vals = list(latent_dict.items())
|
||||
ids, X = zip(*id_vals)
|
||||
X = torch.stack(X, dim=0)[:limit//4]
|
||||
# Unravel X into 4 latents per image, chosen from fixed points. This will serve as a psuedorandom source since these
|
||||
# images are not aligned.
|
||||
b,c,h,w = X.shape
|
||||
X_c = []
|
||||
for rc in random_coords:
|
||||
X_c.append(X[:, :, rc[0], rc[1]])
|
||||
X = torch.cat(X_c, dim=0)
|
||||
labels = np.zeros(X.shape[0]) # We don't have any labels..
|
||||
|
||||
# confirm that x file get same number point than label file
|
||||
# otherwise may cause error in scatter
|
||||
assert(len(X[:, 0])==len(X[:,1]))
|
||||
assert(len(X)==len(labels))
|
||||
|
||||
with torch.no_grad():
|
||||
Y = tsne(X, 2, 128, 20.0)
|
||||
|
||||
if opt.cuda:
|
||||
Y = Y.cpu().numpy()
|
||||
|
||||
# You may write result in two files
|
||||
# print("Save Y values in file")
|
||||
# Y1 = open("y1.txt", 'w')
|
||||
# Y2 = open('y2.txt', 'w')
|
||||
# for i in range(Y.shape[0]):
|
||||
# Y1.write(str(Y[i,0])+"\n")
|
||||
# Y2.write(str(Y[i,1])+"\n")
|
||||
|
||||
pyplot.scatter(Y[:, 0], Y[:, 1], 20, labels)
|
||||
pyplot.show()
|
||||
torch.save((Y, ids[:limit//4]), "../tsne_output_pix.pth")
|
||||
|
||||
|
||||
# Uses the results from the calculation above to create a **massive** pdf plot that shows 1/8 size images on the tsne
|
||||
# spectrum.
|
||||
def plot_pixel_level_results_as_image_graph():
|
||||
Y, ids = torch.load('../tsne_output_pix.pth')
|
||||
files = torch.load('../results/byol_latents/all_paths.pth')
|
||||
fig, ax = pyplot.subplots()
|
||||
fig.set_size_inches(200,200,forward=True)
|
||||
ax.update_datalim(np.column_stack([Y[:, 0], Y[:, 1]]))
|
||||
ax.autoscale()
|
||||
|
||||
expansion = 32 # Should be latent_compression(=8) * image_compression_at_inference(=4)
|
||||
margins = 1 # Keep in mind this will be multiplied by <expansion>
|
||||
for b in tqdm(range(Y.shape[0])):
|
||||
if b % 4 == 0:
|
||||
id = b // 4
|
||||
imgfile = files[id]
|
||||
baseim = pyplot.imread(imgfile)
|
||||
|
||||
ct, cl = random_coords[b%4]
|
||||
im = baseim[expansion*(ct-margins):expansion*(ct+margins),
|
||||
expansion*(cl-margins):expansion*(cl+margins),:]
|
||||
im = OffsetImage(im, zoom=1)
|
||||
ab = AnnotationBbox(im, (Y[b, 0], Y[b, 1]), xycoords='data', frameon=False)
|
||||
ax.add_artist(ab)
|
||||
ax.scatter(Y[:, 0], Y[:, 1])
|
||||
|
||||
pyplot.savefig('tsne_pix.pdf')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#run_tsne()
|
||||
plot_results_as_image_graph()
|
||||
# For use with instance-level results (e.g. from byol_resnet_playground.py)
|
||||
#run_tsne_instance_level()
|
||||
#plot_instance_level_results_as_image_graph()
|
||||
|
||||
# For use with pixel-level results (e.g. from byol_uresnet_playground)
|
||||
#run_tsne_pixel_level()
|
||||
plot_pixel_level_results_as_image_graph()
|
Loading…
Reference in New Issue
Block a user