Move byol scripts around

This commit is contained in:
James Betker 2021-01-06 14:52:17 -07:00
parent 2f2f87bbea
commit 9680294430
5 changed files with 186 additions and 23 deletions

View File

@ -3,7 +3,7 @@ import torch
from models.spinenet_arch import SpineNet
if __name__ == '__main__':
pretrained_path = '../../experiments/resnet_byol_diffframe_115k.pth'
pretrained_path = '../../../experiments/resnet_byol_diffframe_115k.pth'
output_path = '../../experiments/resnet_byol_diffframe_115k_.pth'
wrap_key = 'online_encoder.net.'

View File

@ -119,7 +119,7 @@ def produce_latent_dict(model):
id += batch_size
if id > 1000:
print("Saving checkpoint..")
torch.save((latents, paths), 'results.pth')
torch.save((latents, paths), '../results.pth')
id = 0
@ -128,7 +128,7 @@ def find_similar_latents(model, compare_fn=structural_euc_dist):
img = 'F:\\4k6k\\datasets\\ns_images\\imagesets\\1024_test\\80692045.jpg.jpg'
#img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\nicky_xx.jpg'
output_path = '../../results/byol_resnet_similars'
output_path = '../../../results/byol_resnet_similars'
os.makedirs(output_path, exist_ok=True)
imglatent = get_latent_for_img(model, img).squeeze().unsqueeze(0)
_, c = imglatent.shape
@ -161,7 +161,7 @@ def find_similar_latents(model, compare_fn=structural_euc_dist):
if __name__ == '__main__':
pretrained_path = '../../experiments/resnet_byol_diffframe_115k.pth'
pretrained_path = '../../../experiments/resnet_byol_diffframe_115k.pth'
model = resnet50(pretrained=False).to('cuda')
sd = torch.load(pretrained_path)
resnet_sd = {}

View File

@ -19,6 +19,7 @@ from models.spinenet_arch import SpineNet
# Computes the structural euclidean distance between [x,y]. "Structural" here means the [h,w] dimensions are preserved
# and the distance is computed across the channel dimension.
from utils import util
from utils.options import dict_to_nonedict
def structural_euc_dist(x, y):
@ -50,7 +51,7 @@ def im_norm(x):
def get_image_folder_dataloader(batch_size, num_workers):
dataset_opt = {
dataset_opt = dict_to_nonedict({
'name': 'amalgam',
'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\imageset_1024_square_with_new'],
#'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\1024_test'],
@ -58,15 +59,14 @@ def get_image_folder_dataloader(batch_size, num_workers):
'target_size': 512,
'force_multiple': 32,
'scale': 1
}
})
dataset = ImageFolderDataset(dataset_opt)
return DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)
def create_latent_database(model, model_index=0):
batch_size = 8
num_workers = 1
output_path = '../../results/byol_spinenet_latents/'
def create_latent_database(model, model_index=0, batch_size=8):
num_workers = 4
output_path = '../results/byol_latents/'
os.makedirs(output_path, exist_ok=True)
dataloader = get_image_folder_dataloader(batch_size, num_workers)
@ -76,7 +76,9 @@ def create_latent_database(model, model_index=0):
all_paths = []
for batch in tqdm(dataloader):
hq = batch['hq'].to('cuda')
latent = model(hq)[model_index] # BYOL trainer only trains the '4' output, which is indexed at [1]. Confusing.
latent = model(hq)
if isinstance(latent, tuple):
latent = latent[model_index]
for b in range(latent.shape[0]):
im_path = batch['HQ_path'][b]
all_paths.append(im_path)
@ -124,10 +126,10 @@ def _get_mins_from_latent_dictionary(latent, hq_img_repo, ld_file_name, batch_si
def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=structural_euc_dist):
img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\adrianna_xx.jpg'
#img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\nicky_xx.jpg'
hq_img_repo = '../../results/byol_spinenet_latents'
output_path = '../../results/byol_spinenet_similars'
batch_size = 2048
num_maps = 4
hq_img_repo = '../results/byol_latents'
output_path = '../results/byol_similars'
batch_size = 4096
num_maps = 1
lat_patch_mult = 512 // lat_patch_size
os.makedirs(output_path, exist_ok=True)
@ -135,7 +137,10 @@ def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=str
img_t = ToTensor()(Image.open(img)).to('cuda').unsqueeze(0)
_, _, h, w = img_t.shape
img_t = img_t[:, :, :128*(h//128), :128*(w//128)]
latent = model(img_t)[model_index]
latent = model(img_t)
if not isinstance(latent, tuple):
latent = (latent,)
latent = latent[model_index]
_, c, h, w = latent.shape
mins, min_offsets = [], []
@ -195,7 +200,7 @@ def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=str
def explore_latent_results(model):
batch_size = 16
num_workers = 1
num_workers = 4
output_path = '../../results/byol_spinenet_explore_latents/'
os.makedirs(output_path, exist_ok=True)
@ -225,7 +230,7 @@ class BYOLModelWrapper(nn.Module):
if __name__ == '__main__':
pretrained_path = '../../experiments/spinenet49_imgset_sbyol.pth'
pretrained_path = '../../../experiments/spinenet49_imgset_sbyol.pth'
model = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')
model.load_state_dict(torch.load(pretrained_path), strict=True)
model.eval()

View File

@ -0,0 +1,84 @@
import os
import shutil
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from PIL import Image
from torch.utils.data import DataLoader
from torchvision.models.resnet import Bottleneck
from torchvision.transforms import ToTensor, Resize
from tqdm import tqdm
import numpy as np
import utils
from data.image_folder_dataset import ImageFolderDataset
from models.pixel_level_contrastive_learning.resnet_unet import UResNet50
from models.resnet_with_checkpointing import resnet50
from models.spinenet_arch import SpineNet
# Computes the structural euclidean distance between [x,y]. "Structural" here means the [h,w] dimensions are preserved
# and the distance is computed across the channel dimension.
from scripts.byol.byol_spinenet_playground import find_similar_latents, create_latent_database
from utils import util
from utils.options import dict_to_nonedict
def structural_euc_dist(x, y):
diff = torch.square(x - y)
sum = torch.sum(diff, dim=-1)
return torch.sqrt(sum)
def cosine_similarity(x, y):
x = norm(x)
y = norm(y)
return -nn.CosineSimilarity()(x, y) # probably better to just use this class to perform the calc. Just left this here to remind myself.
def key_value_difference(x, y):
x = F.normalize(x, dim=-1, p=2)
y = F.normalize(y, dim=-1, p=2)
return 2 - 2 * (x * y).sum(dim=-1)
def norm(x):
sh = x.shape
sh_r = tuple([sh[i] if i != len(sh)-1 else 1 for i in range(len(sh))])
return (x - torch.mean(x, dim=-1).reshape(sh_r)) / torch.std(x, dim=-1).reshape(sh_r)
def im_norm(x):
return (((x - torch.mean(x, dim=(2,3)).reshape(-1,1,1,1)) / torch.std(x, dim=(2,3)).reshape(-1,1,1,1)) * .5) + .5
def get_image_folder_dataloader(batch_size, num_workers):
dataset_opt = dict_to_nonedict({
'name': 'amalgam',
'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\imageset_1024_square_with_new'],
#'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\1024_test'],
'weights': [1],
'target_size': 256,
'force_multiple': 32,
'scale': 1
})
dataset = ImageFolderDataset(dataset_opt)
return DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)
if __name__ == '__main__':
pretrained_path = '../experiments/uresnet_pixpro_83k.pth'
model = UResNet50(Bottleneck, [3,4,6,3]).to('cuda')
sd = torch.load(pretrained_path)
resnet_sd = {}
for k, v in sd.items():
if 'target_encoder.net.' in k:
resnet_sd[k.replace('target_encoder.net.', '')] = v
model.load_state_dict(resnet_sd, strict=True)
model.eval()
with torch.no_grad():
find_similar_latents(model, 0, 8, structural_euc_dist)
#create_latent_database(model, batch_size=32)

View File

@ -204,7 +204,7 @@ def tsne(X, no_dims=2, initial_dims=50, perplexity=30.0):
return Y
def run_tsne():
def run_tsne_instance_level():
print("Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset.")
limit = 4000
@ -236,12 +236,12 @@ def run_tsne():
pyplot.scatter(Y[:, 0], Y[:, 1], 20, labels)
pyplot.show()
torch.save((Y, files[:limit]), "tsne_output.pth")
torch.save((Y, files[:limit]), "../tsne_output.pth")
# Uses the results from the calculation above to create a **massive** pdf plot that shows 1/8 size images on the tsne
# spectrum.
def plot_results_as_image_graph():
def plot_instance_level_results_as_image_graph():
Y, files = torch.load('tsne_output.pth')
fig, ax = pyplot.subplots()
fig.set_size_inches(200,200,forward=True)
@ -258,6 +258,80 @@ def plot_results_as_image_graph():
pyplot.savefig('tsne.pdf')
random_coords = [(16,16), (14,14), (20,20), (24,24)]
def run_tsne_pixel_level():
limit = 4000
latent_dict = torch.load('../results/byol_latents/latent_dict_1.pth')
id_vals = list(latent_dict.items())
ids, X = zip(*id_vals)
X = torch.stack(X, dim=0)[:limit//4]
# Unravel X into 4 latents per image, chosen from fixed points. This will serve as a psuedorandom source since these
# images are not aligned.
b,c,h,w = X.shape
X_c = []
for rc in random_coords:
X_c.append(X[:, :, rc[0], rc[1]])
X = torch.cat(X_c, dim=0)
labels = np.zeros(X.shape[0]) # We don't have any labels..
# confirm that x file get same number point than label file
# otherwise may cause error in scatter
assert(len(X[:, 0])==len(X[:,1]))
assert(len(X)==len(labels))
with torch.no_grad():
Y = tsne(X, 2, 128, 20.0)
if opt.cuda:
Y = Y.cpu().numpy()
# You may write result in two files
# print("Save Y values in file")
# Y1 = open("y1.txt", 'w')
# Y2 = open('y2.txt', 'w')
# for i in range(Y.shape[0]):
# Y1.write(str(Y[i,0])+"\n")
# Y2.write(str(Y[i,1])+"\n")
pyplot.scatter(Y[:, 0], Y[:, 1], 20, labels)
pyplot.show()
torch.save((Y, ids[:limit//4]), "../tsne_output_pix.pth")
# Uses the results from the calculation above to create a **massive** pdf plot that shows 1/8 size images on the tsne
# spectrum.
def plot_pixel_level_results_as_image_graph():
Y, ids = torch.load('../tsne_output_pix.pth')
files = torch.load('../results/byol_latents/all_paths.pth')
fig, ax = pyplot.subplots()
fig.set_size_inches(200,200,forward=True)
ax.update_datalim(np.column_stack([Y[:, 0], Y[:, 1]]))
ax.autoscale()
expansion = 32 # Should be latent_compression(=8) * image_compression_at_inference(=4)
margins = 1 # Keep in mind this will be multiplied by <expansion>
for b in tqdm(range(Y.shape[0])):
if b % 4 == 0:
id = b // 4
imgfile = files[id]
baseim = pyplot.imread(imgfile)
ct, cl = random_coords[b%4]
im = baseim[expansion*(ct-margins):expansion*(ct+margins),
expansion*(cl-margins):expansion*(cl+margins),:]
im = OffsetImage(im, zoom=1)
ab = AnnotationBbox(im, (Y[b, 0], Y[b, 1]), xycoords='data', frameon=False)
ax.add_artist(ab)
ax.scatter(Y[:, 0], Y[:, 1])
pyplot.savefig('tsne_pix.pdf')
if __name__ == "__main__":
#run_tsne()
plot_results_as_image_graph()
# For use with instance-level results (e.g. from byol_resnet_playground.py)
#run_tsne_instance_level()
#plot_instance_level_results_as_image_graph()
# For use with pixel-level results (e.g. from byol_uresnet_playground)
#run_tsne_pixel_level()
plot_pixel_level_results_as_image_graph()