Move byol scripts around

This commit is contained in:
James Betker 2021-01-06 14:52:17 -07:00
parent 2f2f87bbea
commit 9680294430
5 changed files with 186 additions and 23 deletions

View File

@ -3,7 +3,7 @@ import torch
from models.spinenet_arch import SpineNet
if __name__ == '__main__':
pretrained_path = '../../experiments/resnet_byol_diffframe_115k.pth'
pretrained_path = '../../../experiments/resnet_byol_diffframe_115k.pth'
output_path = '../../experiments/resnet_byol_diffframe_115k_.pth'
wrap_key = ''

View File

@ -119,7 +119,7 @@ def produce_latent_dict(model):
id += batch_size
if id > 1000:
print("Saving checkpoint.."), paths), 'results.pth'), paths), '../results.pth')
id = 0
@ -128,7 +128,7 @@ def find_similar_latents(model, compare_fn=structural_euc_dist):
img = 'F:\\4k6k\\datasets\\ns_images\\imagesets\\1024_test\\80692045.jpg.jpg'
#img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\nicky_xx.jpg'
output_path = '../../results/byol_resnet_similars'
output_path = '../../../results/byol_resnet_similars'
os.makedirs(output_path, exist_ok=True)
imglatent = get_latent_for_img(model, img).squeeze().unsqueeze(0)
_, c = imglatent.shape
@ -161,7 +161,7 @@ def find_similar_latents(model, compare_fn=structural_euc_dist):
if __name__ == '__main__':
pretrained_path = '../../experiments/resnet_byol_diffframe_115k.pth'
pretrained_path = '../../../experiments/resnet_byol_diffframe_115k.pth'
model = resnet50(pretrained=False).to('cuda')
sd = torch.load(pretrained_path)
resnet_sd = {}

View File

@ -19,6 +19,7 @@ from models.spinenet_arch import SpineNet
# Computes the structural euclidean distance between [x,y]. "Structural" here means the [h,w] dimensions are preserved
# and the distance is computed across the channel dimension.
from utils import util
from utils.options import dict_to_nonedict
def structural_euc_dist(x, y):
@ -50,7 +51,7 @@ def im_norm(x):
def get_image_folder_dataloader(batch_size, num_workers):
dataset_opt = {
dataset_opt = dict_to_nonedict({
'name': 'amalgam',
'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\imageset_1024_square_with_new'],
#'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\1024_test'],
@ -58,15 +59,14 @@ def get_image_folder_dataloader(batch_size, num_workers):
'target_size': 512,
'force_multiple': 32,
'scale': 1
dataset = ImageFolderDataset(dataset_opt)
return DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)
def create_latent_database(model, model_index=0):
batch_size = 8
num_workers = 1
output_path = '../../results/byol_spinenet_latents/'
def create_latent_database(model, model_index=0, batch_size=8):
num_workers = 4
output_path = '../results/byol_latents/'
os.makedirs(output_path, exist_ok=True)
dataloader = get_image_folder_dataloader(batch_size, num_workers)
@ -76,7 +76,9 @@ def create_latent_database(model, model_index=0):
all_paths = []
for batch in tqdm(dataloader):
hq = batch['hq'].to('cuda')
latent = model(hq)[model_index] # BYOL trainer only trains the '4' output, which is indexed at [1]. Confusing.
latent = model(hq)
if isinstance(latent, tuple):
latent = latent[model_index]
for b in range(latent.shape[0]):
im_path = batch['HQ_path'][b]
@ -124,10 +126,10 @@ def _get_mins_from_latent_dictionary(latent, hq_img_repo, ld_file_name, batch_si
def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=structural_euc_dist):
img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\adrianna_xx.jpg'
#img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\nicky_xx.jpg'
hq_img_repo = '../../results/byol_spinenet_latents'
output_path = '../../results/byol_spinenet_similars'
batch_size = 2048
num_maps = 4
hq_img_repo = '../results/byol_latents'
output_path = '../results/byol_similars'
batch_size = 4096
num_maps = 1
lat_patch_mult = 512 // lat_patch_size
os.makedirs(output_path, exist_ok=True)
@ -135,7 +137,10 @@ def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=str
img_t = ToTensor()('cuda').unsqueeze(0)
_, _, h, w = img_t.shape
img_t = img_t[:, :, :128*(h//128), :128*(w//128)]
latent = model(img_t)[model_index]
latent = model(img_t)
if not isinstance(latent, tuple):
latent = (latent,)
latent = latent[model_index]
_, c, h, w = latent.shape
mins, min_offsets = [], []
@ -195,7 +200,7 @@ def find_similar_latents(model, model_index=0, lat_patch_size=16, compare_fn=str
def explore_latent_results(model):
batch_size = 16
num_workers = 1
num_workers = 4
output_path = '../../results/byol_spinenet_explore_latents/'
os.makedirs(output_path, exist_ok=True)
@ -225,7 +230,7 @@ class BYOLModelWrapper(nn.Module):
if __name__ == '__main__':
pretrained_path = '../../experiments/spinenet49_imgset_sbyol.pth'
pretrained_path = '../../../experiments/spinenet49_imgset_sbyol.pth'
model = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')
model.load_state_dict(torch.load(pretrained_path), strict=True)

View File

@ -0,0 +1,84 @@
import os
import shutil
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from PIL import Image
from import DataLoader
from torchvision.models.resnet import Bottleneck
from torchvision.transforms import ToTensor, Resize
from tqdm import tqdm
import numpy as np
import utils
from data.image_folder_dataset import ImageFolderDataset
from models.pixel_level_contrastive_learning.resnet_unet import UResNet50
from models.resnet_with_checkpointing import resnet50
from models.spinenet_arch import SpineNet
# Computes the structural euclidean distance between [x,y]. "Structural" here means the [h,w] dimensions are preserved
# and the distance is computed across the channel dimension.
from scripts.byol.byol_spinenet_playground import find_similar_latents, create_latent_database
from utils import util
from utils.options import dict_to_nonedict
def structural_euc_dist(x, y):
diff = torch.square(x - y)
sum = torch.sum(diff, dim=-1)
return torch.sqrt(sum)
def cosine_similarity(x, y):
x = norm(x)
y = norm(y)
return -nn.CosineSimilarity()(x, y) # probably better to just use this class to perform the calc. Just left this here to remind myself.
def key_value_difference(x, y):
x = F.normalize(x, dim=-1, p=2)
y = F.normalize(y, dim=-1, p=2)
return 2 - 2 * (x * y).sum(dim=-1)
def norm(x):
sh = x.shape
sh_r = tuple([sh[i] if i != len(sh)-1 else 1 for i in range(len(sh))])
return (x - torch.mean(x, dim=-1).reshape(sh_r)) / torch.std(x, dim=-1).reshape(sh_r)
def im_norm(x):
return (((x - torch.mean(x, dim=(2,3)).reshape(-1,1,1,1)) / torch.std(x, dim=(2,3)).reshape(-1,1,1,1)) * .5) + .5
def get_image_folder_dataloader(batch_size, num_workers):
dataset_opt = dict_to_nonedict({
'name': 'amalgam',
'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\imageset_1024_square_with_new'],
#'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\1024_test'],
'weights': [1],
'target_size': 256,
'force_multiple': 32,
'scale': 1
dataset = ImageFolderDataset(dataset_opt)
return DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)
if __name__ == '__main__':
pretrained_path = '../experiments/uresnet_pixpro_83k.pth'
model = UResNet50(Bottleneck, [3,4,6,3]).to('cuda')
sd = torch.load(pretrained_path)
resnet_sd = {}
for k, v in sd.items():
if '' in k:
resnet_sd[k.replace('', '')] = v
model.load_state_dict(resnet_sd, strict=True)
with torch.no_grad():
find_similar_latents(model, 0, 8, structural_euc_dist)
#create_latent_database(model, batch_size=32)

View File

@ -204,7 +204,7 @@ def tsne(X, no_dims=2, initial_dims=50, perplexity=30.0):
return Y
def run_tsne():
def run_tsne_instance_level():
print("Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset.")
limit = 4000
@ -236,12 +236,12 @@ def run_tsne():
pyplot.scatter(Y[:, 0], Y[:, 1], 20, labels), files[:limit]), "tsne_output.pth"), files[:limit]), "../tsne_output.pth")
# Uses the results from the calculation above to create a **massive** pdf plot that shows 1/8 size images on the tsne
# spectrum.
def plot_results_as_image_graph():
def plot_instance_level_results_as_image_graph():
Y, files = torch.load('tsne_output.pth')
fig, ax = pyplot.subplots()
@ -258,6 +258,80 @@ def plot_results_as_image_graph():
random_coords = [(16,16), (14,14), (20,20), (24,24)]
def run_tsne_pixel_level():
limit = 4000
latent_dict = torch.load('../results/byol_latents/latent_dict_1.pth')
id_vals = list(latent_dict.items())
ids, X = zip(*id_vals)
X = torch.stack(X, dim=0)[:limit//4]
# Unravel X into 4 latents per image, chosen from fixed points. This will serve as a psuedorandom source since these
# images are not aligned.
b,c,h,w = X.shape
X_c = []
for rc in random_coords:
X_c.append(X[:, :, rc[0], rc[1]])
X =, dim=0)
labels = np.zeros(X.shape[0]) # We don't have any labels..
# confirm that x file get same number point than label file
# otherwise may cause error in scatter
assert(len(X[:, 0])==len(X[:,1]))
with torch.no_grad():
Y = tsne(X, 2, 128, 20.0)
if opt.cuda:
Y = Y.cpu().numpy()
# You may write result in two files
# print("Save Y values in file")
# Y1 = open("y1.txt", 'w')
# Y2 = open('y2.txt', 'w')
# for i in range(Y.shape[0]):
# Y1.write(str(Y[i,0])+"\n")
# Y2.write(str(Y[i,1])+"\n")
pyplot.scatter(Y[:, 0], Y[:, 1], 20, labels), ids[:limit//4]), "../tsne_output_pix.pth")
# Uses the results from the calculation above to create a **massive** pdf plot that shows 1/8 size images on the tsne
# spectrum.
def plot_pixel_level_results_as_image_graph():
Y, ids = torch.load('../tsne_output_pix.pth')
files = torch.load('../results/byol_latents/all_paths.pth')
fig, ax = pyplot.subplots()
ax.update_datalim(np.column_stack([Y[:, 0], Y[:, 1]]))
expansion = 32 # Should be latent_compression(=8) * image_compression_at_inference(=4)
margins = 1 # Keep in mind this will be multiplied by <expansion>
for b in tqdm(range(Y.shape[0])):
if b % 4 == 0:
id = b // 4
imgfile = files[id]
baseim = pyplot.imread(imgfile)
ct, cl = random_coords[b%4]
im = baseim[expansion*(ct-margins):expansion*(ct+margins),
im = OffsetImage(im, zoom=1)
ab = AnnotationBbox(im, (Y[b, 0], Y[b, 1]), xycoords='data', frameon=False)
ax.scatter(Y[:, 0], Y[:, 1])
if __name__ == "__main__":
# For use with instance-level results (e.g. from
# For use with pixel-level results (e.g. from byol_uresnet_playground)