DL-Art-School/codes/scripts/byol_spinenet_playground.py

import os
import shutil

import torch
import torch.nn as nn
import torchvision
from PIL import Image
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor, Resize
from tqdm import tqdm
import numpy as np

from data.image_folder_dataset import ImageFolderDataset
from models.archs.spinenet_arch import SpineNet


# Computes the structural euclidean distance between [x,y]. "Structural" here means the [h,w] dimensions are preserved
# and the distance is computed across the channel dimension.
def structural_euc_dist(x, y):
    diff = torch.square(x - y)
    sum = torch.sum(diff, dim=-1)
    return torch.sqrt(sum)


def cosine_similarity(x, y):
    x = norm(x)
    y = norm(y)
    return -nn.CosineSimilarity()(x, y)   # probably better to just use this class to perform the calc. Just left this here to remind myself.


def norm(x):
    sh = x.shape
    sh_r = tuple([sh[i] if i != len(sh)-1 else 1 for i in range(len(sh))])
    return (x - torch.mean(x, dim=-1).reshape(sh_r)) / torch.std(x, dim=-1).reshape(sh_r)


def im_norm(x):
    return (((x - torch.mean(x, dim=(2,3)).reshape(-1,1,1,1)) / torch.std(x, dim=(2,3)).reshape(-1,1,1,1)) * .5) + .5


def get_image_folder_dataloader(batch_size, num_workers):
    dataset_opt = {
        'name': 'amalgam',
        #'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\imageset_1024_square_with_new'],
        'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\1024_test'],
        'weights': [1],
        'target_size': 512,
        'force_multiple': 32,
        'scale': 1
    }
    dataset = ImageFolderDataset(dataset_opt)
    return DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)


def create_latent_database(model):
    batch_size = 8
    num_workers = 1
    output_path = '../../results/byol_spinenet_latents/'

    os.makedirs(output_path, exist_ok=True)
    dataloader = get_image_folder_dataloader(batch_size, num_workers)
    id = 0
    dict_count = 1
    latent_dict = {}
    all_paths = []
    for batch in tqdm(dataloader):
        hq = batch['hq'].to('cuda')
        latent = model(hq)[1]   # BYOL trainer only trains the '4' output, which is indexed at [1]. Confusing.
        for b in range(latent.shape[0]):
            im_path = batch['HQ_path'][b]
            all_paths.append(im_path)
            latent_dict[id] = latent[b].detach().cpu()
            if (id+1) % 1000 == 0:
                print("Saving checkpoint..")
                torch.save(latent_dict, os.path.join(output_path, "latent_dict_%i.pth" % (dict_count,)))
                latent_dict = {}
                torch.save(all_paths, os.path.join(output_path, "all_paths.pth"))
                dict_count += 1
            id += 1


def _get_mins_from_latent_dictionary(latent, hq_img_repo, ld_file_name, batch_size):
    _, c, h, w = latent.shape
    lat_dict = torch.load(os.path.join(hq_img_repo, ld_file_name))
    comparables = torch.stack(list(lat_dict.values()), dim=0).permute(0,2,3,1)
    cbl_shape = comparables.shape[:3]
    assert cbl_shape[1] == 32
    comparables = comparables.reshape(-1, c)

    clat = latent.reshape(1,-1,h*w).permute(2,0,1)
    cpbl_chunked = torch.chunk(comparables, len(comparables) // batch_size)
    assert len(comparables) % batch_size == 0   # The reconstruction logic doesn't work if this is not the case.
    mins = []
    min_offsets = []
    for cpbl_chunk in tqdm(cpbl_chunked):
        cpbl_chunk = cpbl_chunk.to('cuda')
        dist = structural_euc_dist(clat, cpbl_chunk.unsqueeze(0))
        _min = torch.min(dist, dim=-1)
        mins.append(_min[0])
        min_offsets.append(_min[1])
    mins = torch.min(torch.stack(mins, dim=-1), dim=-1)
    # There's some way to do this in torch, I just can't figure it out..
    for i in range(len(mins[1])):
        mins[1][i] = mins[1][i] * batch_size + min_offsets[mins[1][i]][i]

    return mins[0].cpu(), mins[1].cpu(), len(comparables)


def find_similar_latents(model):
    img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\adrianna_xx.jpg'
    #img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\nicky_xx.jpg'
    hq_img_repo = '../../results/byol_spinenet_latents'
    output_path = '../../results/byol_spinenet_similars'
    batch_size = 1024
    num_maps = 8

    os.makedirs(output_path, exist_ok=True)
    img_bank_paths = torch.load(os.path.join(hq_img_repo, "all_paths.pth"))
    img_t = ToTensor()(Image.open(img)).to('cuda').unsqueeze(0)
    _, _, h, w = img_t.shape
    img_t = img_t[:, :, :128*(h//128), :128*(w//128)]

    latent = model(img_t)[1]
    _, c, h, w = latent.shape
    mins, min_offsets = [], []
    total_latents = -1
    for d_id in range(1,num_maps+1):
        mn, of, tl = _get_mins_from_latent_dictionary(latent, hq_img_repo, "latent_dict_%i.pth" % (d_id), batch_size)
        if total_latents != -1:
            assert total_latents == tl
        else:
            total_latents = tl
        mins.append(mn)
        min_offsets.append(of)
    mins = torch.min(torch.stack(mins, dim=-1), dim=-1)
    # There's some way to do this in torch, I just can't figure it out..
    for i in range(len(mins[1])):
        mins[1][i] = mins[1][i] * total_latents + min_offsets[mins[1][i]][i]
    min_ids = mins[1]

    print("Constructing image map..")
    doc_out = '''
    <html><body><img id="imgmap" src="source.png" usemap="#map">
    <map name="map">%s</map><br>
    <button onclick="if(imgmap.src.includes('output.png')){imgmap.src='source.png';}else{imgmap.src='output.png';}">Swap Images</button>
    </body></html>
    '''
    img_map_areas = []
    img_out = torch.zeros((1,3,h*16,w*16))
    for i, ind in enumerate(tqdm(min_ids)):
        u = np.unravel_index(ind.item(), (num_maps*total_latents//(32*32),32,32))
        h_, w_ = np.unravel_index(i, (h, w))

        img = ToTensor()(Resize((512, 512))(Image.open(img_bank_paths[u[0]])))
        t = 16 * u[1]
        l = 16 * u[2]
        patch = img[:, t:t+16, l:l+16]
        img_out[:,:,h_*16:h_*16+16,w_*16:w_*16+16] = patch

        # Also save the image with a masked map
        mask = torch.full_like(img, fill_value=.3)
        mask[:, t:t+16, l:l+16] = 1
        masked_img = img * mask
        masked_src_img_output_file = os.path.join(output_path, "%i_%i__%i.png" % (t, l, u[0]))
        torchvision.utils.save_image(masked_img, masked_src_img_output_file)

        # Update the image map areas.
        img_map_areas.append('<area shape="rect" coords="%i,%i,%i,%i" href="%s">' % (w_*16,h_*16,w_*16+16,h_*16+16,masked_src_img_output_file))
    torchvision.utils.save_image(img_out, os.path.join(output_path, "output.png"))
    torchvision.utils.save_image(img_t, os.path.join(output_path, "source.png"))
    doc_out = doc_out % ('\n'.join(img_map_areas))
    with open(os.path.join(output_path, 'map.html'), 'w') as f:
        print(doc_out, file=f)


def explore_latent_results(model):
    batch_size = 16
    num_workers = 1
    output_path = '../../results/byol_spinenet_explore_latents/'

    os.makedirs(output_path, exist_ok=True)
    dataloader = get_image_folder_dataloader(batch_size, num_workers)
    id = 0
    for batch in tqdm(dataloader):
        hq = batch['hq'].to('cuda')
        latent = model(hq)[1]   # BYOL trainer only trains the '4' output, which is indexed at [1]. Confusing.
        # This operation works by computing the distance of every structural index from the center and using that
        # as a "heatmap".
        b, c, h, w = latent.shape
        center = latent[:, :, h//2, w//2].unsqueeze(-1).unsqueeze(-1)
        centers = center.repeat(1, 1, h, w)
        dist = cosine_similarity(latent, centers).unsqueeze(1)
        dist = im_norm(dist)
        torchvision.utils.save_image(dist, os.path.join(output_path, "%i.png" % id))
        id += 1


if __name__ == '__main__':
    pretrained_path = '../../experiments/spinenet49_imgset_byol.pth'

    model = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')
    model.load_state_dict(torch.load(pretrained_path), strict=True)
    model.eval()

    with torch.no_grad():
        find_similar_latents(model)
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`import os`
			`import shutil`

			`import torch`
			`import torch.nn as nn`
			`import torchvision`
Spinenet playground 2020-12-07 19:49:32 +00:00			`from PIL import Image`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`from torch.utils.data import DataLoader`
Spinenet playground 2020-12-07 19:49:32 +00:00			`from torchvision.transforms import ToTensor, Resize`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`from tqdm import tqdm`
Spinenet playground 2020-12-07 19:49:32 +00:00			`import numpy as np`
spinenet latent playground! 2020-12-06 03:30:36 +00:00
			`from data.image_folder_dataset import ImageFolderDataset`
			`from models.archs.spinenet_arch import SpineNet`


			`# Computes the structural euclidean distance between [x,y]. "Structural" here means the [h,w] dimensions are preserved`
			`# and the distance is computed across the channel dimension.`
			`def structural_euc_dist(x, y):`
			`diff = torch.square(x - y)`
Spinenet playground 2020-12-07 19:49:32 +00:00			`sum = torch.sum(diff, dim=-1)`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`return torch.sqrt(sum)`


			`def cosine_similarity(x, y):`
Spinenet playground 2020-12-07 19:49:32 +00:00			`x = norm(x)`
			`y = norm(y)`
			`return -nn.CosineSimilarity()(x, y) # probably better to just use this class to perform the calc. Just left this here to remind myself.`


			`def norm(x):`
			`sh = x.shape`
			`sh_r = tuple([sh[i] if i != len(sh)-1 else 1 for i in range(len(sh))])`
			`return (x - torch.mean(x, dim=-1).reshape(sh_r)) / torch.std(x, dim=-1).reshape(sh_r)`
spinenet latent playground! 2020-12-06 03:30:36 +00:00

			`def im_norm(x):`
			`return (((x - torch.mean(x, dim=(2,3)).reshape(-1,1,1,1)) / torch.std(x, dim=(2,3)).reshape(-1,1,1,1)) * .5) + .5`


			`def get_image_folder_dataloader(batch_size, num_workers):`
			`dataset_opt = {`
			`'name': 'amalgam',`
Spinenet playground 2020-12-07 19:49:32 +00:00			`#'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\imageset_1024_square_with_new'],`
			`'paths': ['F:\\4k6k\\datasets\\ns_images\\imagesets\\1024_test'],`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`'weights': [1],`
			`'target_size': 512,`
			`'force_multiple': 32,`
			`'scale': 1`
			`}`
			`dataset = ImageFolderDataset(dataset_opt)`
Spinenet playground 2020-12-07 19:49:32 +00:00			`return DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)`
spinenet latent playground! 2020-12-06 03:30:36 +00:00

			`def create_latent_database(model):`
			`batch_size = 8`
			`num_workers = 1`
			`output_path = '../../results/byol_spinenet_latents/'`

			`os.makedirs(output_path, exist_ok=True)`
			`dataloader = get_image_folder_dataloader(batch_size, num_workers)`
			`id = 0`
Spinenet playground 2020-12-07 19:49:32 +00:00			`dict_count = 1`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`latent_dict = {}`
Spinenet playground 2020-12-07 19:49:32 +00:00			`all_paths = []`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`for batch in tqdm(dataloader):`
Stage 2020-12-08 07:33:07 +00:00			`hq = batch['hq'].to('cuda')`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`latent = model(hq)[1] # BYOL trainer only trains the '4' output, which is indexed at [1]. Confusing.`
			`for b in range(latent.shape[0]):`
Spinenet playground 2020-12-07 19:49:32 +00:00			`im_path = batch['HQ_path'][b]`
			`all_paths.append(im_path)`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`latent_dict[id] = latent[b].detach().cpu()`
Spinenet playground 2020-12-07 19:49:32 +00:00			`if (id+1) % 1000 == 0:`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`print("Saving checkpoint..")`
Spinenet playground 2020-12-07 19:49:32 +00:00			`torch.save(latent_dict, os.path.join(output_path, "latent_dict_%i.pth" % (dict_count,)))`
			`latent_dict = {}`
			`torch.save(all_paths, os.path.join(output_path, "all_paths.pth"))`
			`dict_count += 1`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`id += 1`


Spinenet playground 2020-12-07 19:49:32 +00:00			`def _get_mins_from_latent_dictionary(latent, hq_img_repo, ld_file_name, batch_size):`
			`_, c, h, w = latent.shape`
			`lat_dict = torch.load(os.path.join(hq_img_repo, ld_file_name))`
			`comparables = torch.stack(list(lat_dict.values()), dim=0).permute(0,2,3,1)`
			`cbl_shape = comparables.shape[:3]`
			`assert cbl_shape[1] == 32`
			`comparables = comparables.reshape(-1, c)`

			`clat = latent.reshape(1,-1,h*w).permute(2,0,1)`
			`cpbl_chunked = torch.chunk(comparables, len(comparables) // batch_size)`
			`assert len(comparables) % batch_size == 0 # The reconstruction logic doesn't work if this is not the case.`
			`mins = []`
			`min_offsets = []`
			`for cpbl_chunk in tqdm(cpbl_chunked):`
Stage 2020-12-08 07:33:07 +00:00			`cpbl_chunk = cpbl_chunk.to('cuda')`
Spinenet playground 2020-12-07 19:49:32 +00:00			`dist = structural_euc_dist(clat, cpbl_chunk.unsqueeze(0))`
			`_min = torch.min(dist, dim=-1)`
			`mins.append(_min[0])`
			`min_offsets.append(_min[1])`
			`mins = torch.min(torch.stack(mins, dim=-1), dim=-1)`
			`# There's some way to do this in torch, I just can't figure it out..`
			`for i in range(len(mins[1])):`
			`mins[1][i] = mins[1][i] * batch_size + min_offsets[mins[1][i]][i]`

			`return mins[0].cpu(), mins[1].cpu(), len(comparables)`


			`def find_similar_latents(model):`
			`img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\adrianna_xx.jpg'`
			`#img = 'F:\\4k6k\\datasets\\ns_images\\adrianna\\analyze\\analyze_xx\\nicky_xx.jpg'`
			`hq_img_repo = '../../results/byol_spinenet_latents'`
			`output_path = '../../results/byol_spinenet_similars'`
			`batch_size = 1024`
			`num_maps = 8`

			`os.makedirs(output_path, exist_ok=True)`
			`img_bank_paths = torch.load(os.path.join(hq_img_repo, "all_paths.pth"))`
Stage 2020-12-08 07:33:07 +00:00			`img_t = ToTensor()(Image.open(img)).to('cuda').unsqueeze(0)`
Spinenet playground 2020-12-07 19:49:32 +00:00			`_, _, h, w = img_t.shape`
			`img_t = img_t[:, :, :128(h//128), :128(w//128)]`

			`latent = model(img_t)[1]`
			`_, c, h, w = latent.shape`
			`mins, min_offsets = [], []`
			`total_latents = -1`
			`for d_id in range(1,num_maps+1):`
			`mn, of, tl = _get_mins_from_latent_dictionary(latent, hq_img_repo, "latent_dict_%i.pth" % (d_id), batch_size)`
			`if total_latents != -1:`
			`assert total_latents == tl`
			`else:`
			`total_latents = tl`
			`mins.append(mn)`
			`min_offsets.append(of)`
			`mins = torch.min(torch.stack(mins, dim=-1), dim=-1)`
			`# There's some way to do this in torch, I just can't figure it out..`
			`for i in range(len(mins[1])):`
			`mins[1][i] = mins[1][i] * total_latents + min_offsets[mins[1][i]][i]`
			`min_ids = mins[1]`

			`print("Constructing image map..")`
			`doc_out = '''`
			`<html><body><img id="imgmap" src="source.png" usemap="#map">`
			`<map name="map">%s</map><br>`
			`<button onclick="if(imgmap.src.includes('output.png')){imgmap.src='source.png';}else{imgmap.src='output.png';}">Swap Images</button>`
			`</body></html>`
			`'''`
			`img_map_areas = []`
			`img_out = torch.zeros((1,3,h16,w16))`
			`for i, ind in enumerate(tqdm(min_ids)):`
			`u = np.unravel_index(ind.item(), (num_mapstotal_latents//(3232),32,32))`
			`h_, w_ = np.unravel_index(i, (h, w))`

			`img = ToTensor()(Resize((512, 512))(Image.open(img_bank_paths[u[0]])))`
			`t = 16 * u[1]`
			`l = 16 * u[2]`
			`patch = img[:, t:t+16, l:l+16]`
			`img_out[:,:,h_16:h_16+16,w_16:w_16+16] = patch`

			`# Also save the image with a masked map`
			`mask = torch.full_like(img, fill_value=.3)`
			`mask[:, t:t+16, l:l+16] = 1`
			`masked_img = img * mask`
			`masked_src_img_output_file = os.path.join(output_path, "%i_%i__%i.png" % (t, l, u[0]))`
			`torchvision.utils.save_image(masked_img, masked_src_img_output_file)`

			`# Update the image map areas.`
			`img_map_areas.append('<area shape="rect" coords="%i,%i,%i,%i" href="%s">' % (w_16,h_16,w_16+16,h_16+16,masked_src_img_output_file))`
			`torchvision.utils.save_image(img_out, os.path.join(output_path, "output.png"))`
			`torchvision.utils.save_image(img_t, os.path.join(output_path, "source.png"))`
			`doc_out = doc_out % ('\n'.join(img_map_areas))`
			`with open(os.path.join(output_path, 'map.html'), 'w') as f:`
			`print(doc_out, file=f)`


spinenet latent playground! 2020-12-06 03:30:36 +00:00			`def explore_latent_results(model):`
Spinenet playground 2020-12-07 19:49:32 +00:00			`batch_size = 16`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`num_workers = 1`
			`output_path = '../../results/byol_spinenet_explore_latents/'`

			`os.makedirs(output_path, exist_ok=True)`
			`dataloader = get_image_folder_dataloader(batch_size, num_workers)`
			`id = 0`
			`for batch in tqdm(dataloader):`
Stage 2020-12-08 07:33:07 +00:00			`hq = batch['hq'].to('cuda')`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`latent = model(hq)[1] # BYOL trainer only trains the '4' output, which is indexed at [1]. Confusing.`
			`# This operation works by computing the distance of every structural index from the center and using that`
			`# as a "heatmap".`
			`b, c, h, w = latent.shape`
			`center = latent[:, :, h//2, w//2].unsqueeze(-1).unsqueeze(-1)`
			`centers = center.repeat(1, 1, h, w)`
Spinenet playground 2020-12-07 19:49:32 +00:00			`dist = cosine_similarity(latent, centers).unsqueeze(1)`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`dist = im_norm(dist)`
			`torchvision.utils.save_image(dist, os.path.join(output_path, "%i.png" % id))`
			`id += 1`


			`if __name__ == '__main__':`
			`pretrained_path = '../../experiments/spinenet49_imgset_byol.pth'`

Stage 2020-12-08 07:33:07 +00:00			`model = SpineNet('49', in_channels=3, use_input_norm=True).to('cuda')`
spinenet latent playground! 2020-12-06 03:30:36 +00:00			`model.load_state_dict(torch.load(pretrained_path), strict=True)`
			`model.eval()`

			`with torch.no_grad():`
Spinenet playground 2020-12-07 19:49:32 +00:00			`find_similar_latents(model)`