stable-diffusion-webui/modules/swinir.py

import sys
import traceback
import cv2
from collections import OrderedDict
import os
import requests
from collections import namedtuple
import numpy as np
from PIL import Image
import torch
import modules.images
from modules.shared import cmd_opts, opts, device
from modules.swinir_arch import SwinIR as net
precision_scope = torch.autocast if cmd_opts.precision == "autocast" else contextlib.nullcontext
def load_model(task = "realsr", large_model = True, model_path="C:/sd/ESRGANn/4x-large.pth", scale=4):

    try:
        modules.shared.sd_upscalers.append(UpscalerSwin("McSwinnySwin"))
    except Exception:
        print(f"Error loading ESRGAN model", file=sys.stderr)
        print(traceback.format_exc(), file=sys.stderr)
    if not large_model:
    # use 'nearest+conv' to avoid block artifacts
        model = net(upscale=scale, in_chans=3, img_size=64, window_size=8,
                    img_range=1., depths=[6, 6, 6, 6, 6, 6], embed_dim=180, num_heads=[6, 6, 6, 6, 6, 6],
                    mlp_ratio=2, upsampler='nearest+conv', resi_connection='1conv')
    else:
        # larger model size; use '3conv' to save parameters and memory; use ema for GAN training
        model = net(upscale=scale, in_chans=3, img_size=64, window_size=8,
                    img_range=1., depths=[6, 6, 6, 6, 6, 6, 6, 6, 6], embed_dim=240,
                    num_heads=[8, 8, 8, 8, 8, 8, 8, 8, 8],
                    mlp_ratio=2, upsampler='nearest+conv', resi_connection='3conv')
    
    pretrained_model = torch.load(model_path)
    model.load_state_dict(pretrained_model["params_ema"], strict=True)

    return model.half().to(device)
    
def upscale(img, tile=opts.ESRGAN_tile, tile_overlap=opts.ESRGAN_tile_overlap, window_size = 8, scale = 4):
    img = np.array(img)
    img = img[:, :, ::-1]
    img = np.moveaxis(img, 2, 0) / 255
    img = torch.from_numpy(img).float()
    img = img.unsqueeze(0).to(device)
    model = load_model()
    with torch.no_grad(), precision_scope("cuda"):
        _, _, h_old, w_old = img.size()
        h_pad = (h_old // window_size + 1) * window_size - h_old
        w_pad = (w_old // window_size + 1) * window_size - w_old
        img = torch.cat([img, torch.flip(img, [2])], 2)[:, :, :h_old + h_pad, :]
        img = torch.cat([img, torch.flip(img, [3])], 3)[:, :, :, :w_old + w_pad]
        output = inference(img, model, tile, tile_overlap, window_size, scale)
        output = output[..., :h_old * scale, :w_old * scale]
        output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
        if output.ndim == 3:
            output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))  # CHW-RGB to HCW-BGR
        output = (output * 255.0).round().astype(np.uint8)  # float32 to uint8
        return Image.fromarray(output, 'RGB')
    
    
def inference(img, model, tile, tile_overlap, window_size, scale):
    # test the image tile by tile
    b, c, h, w = img.size()
    tile = min(tile, h, w)
    assert tile % window_size == 0, "tile size should be a multiple of window_size"
    sf = scale

    stride = tile - tile_overlap
    h_idx_list = list(range(0, h-tile, stride)) + [h-tile]
    w_idx_list = list(range(0, w-tile, stride)) + [w-tile]
    E = torch.zeros(b, c, h*sf, w*sf, dtype=torch.half, device=device).type_as(img)
    W = torch.zeros_like(E, dtype=torch.half, device=device)

    for h_idx in h_idx_list:
        for w_idx in w_idx_list:
            in_patch = img[..., h_idx:h_idx+tile, w_idx:w_idx+tile]
            out_patch = model(in_patch)
            out_patch_mask = torch.ones_like(out_patch)

            E[..., h_idx*sf:(h_idx+tile)*sf, w_idx*sf:(w_idx+tile)*sf].add_(out_patch)
            W[..., h_idx*sf:(h_idx+tile)*sf, w_idx*sf:(w_idx+tile)*sf].add_(out_patch_mask)
    output = E.div_(W)

    return output
    
class UpscalerSwin(modules.images.Upscaler):
    def __init__(self, title):
        self.name = title

    def do_upscale(self, img):
        img = upscale(img)
        return img
extremely basic and incomplete swinir implementation 2022-09-19 20:05:12 +00:00			`import sys`
			`import traceback`
			`import cv2`
			`from collections import OrderedDict`
			`import os`
			`import requests`
			`from collections import namedtuple`
			`import numpy as np`
			`from PIL import Image`
			`import torch`
			`import modules.images`
			`from modules.shared import cmd_opts, opts, device`
			`from modules.swinir_arch import SwinIR as net`
			`precision_scope = torch.autocast if cmd_opts.precision == "autocast" else contextlib.nullcontext`
make swinir actually useful 2022-09-20 13:36:20 +00:00			`def load_model(task = "realsr", large_model = True, model_path="C:/sd/ESRGANn/4x-large.pth", scale=4):`

			`try:`
			`modules.shared.sd_upscalers.append(UpscalerSwin("McSwinnySwin"))`
			`except Exception:`
			`print(f"Error loading ESRGAN model", file=sys.stderr)`
			`print(traceback.format_exc(), file=sys.stderr)`
extremely basic and incomplete swinir implementation 2022-09-19 20:05:12 +00:00			`if not large_model:`
			`# use 'nearest+conv' to avoid block artifacts`
			`model = net(upscale=scale, in_chans=3, img_size=64, window_size=8,`
			`img_range=1., depths=[6, 6, 6, 6, 6, 6], embed_dim=180, num_heads=[6, 6, 6, 6, 6, 6],`
			`mlp_ratio=2, upsampler='nearest+conv', resi_connection='1conv')`
			`else:`
			`# larger model size; use '3conv' to save parameters and memory; use ema for GAN training`
			`model = net(upscale=scale, in_chans=3, img_size=64, window_size=8,`
			`img_range=1., depths=[6, 6, 6, 6, 6, 6, 6, 6, 6], embed_dim=240,`
			`num_heads=[8, 8, 8, 8, 8, 8, 8, 8, 8],`
			`mlp_ratio=2, upsampler='nearest+conv', resi_connection='3conv')`

			`pretrained_model = torch.load(model_path)`
make swinir actually useful 2022-09-20 13:36:20 +00:00			`model.load_state_dict(pretrained_model["params_ema"], strict=True)`
extremely basic and incomplete swinir implementation 2022-09-19 20:05:12 +00:00
			`return model.half().to(device)`

			`def upscale(img, tile=opts.ESRGAN_tile, tile_overlap=opts.ESRGAN_tile_overlap, window_size = 8, scale = 4):`
make swinir actually useful 2022-09-20 13:36:20 +00:00			`img = np.array(img)`
			`img = img[:, :, ::-1]`
			`img = np.moveaxis(img, 2, 0) / 255`
			`img = torch.from_numpy(img).float()`
			`img = img.unsqueeze(0).to(device)`
extremely basic and incomplete swinir implementation 2022-09-19 20:05:12 +00:00			`model = load_model()`
			`with torch.no_grad(), precision_scope("cuda"):`
			`_, _, h_old, w_old = img.size()`
			`h_pad = (h_old // window_size + 1) * window_size - h_old`
			`w_pad = (w_old // window_size + 1) * window_size - w_old`
			`img = torch.cat([img, torch.flip(img, [2])], 2)[:, :, :h_old + h_pad, :]`
			`img = torch.cat([img, torch.flip(img, [3])], 3)[:, :, :, :w_old + w_pad]`
			`output = inference(img, model, tile, tile_overlap, window_size, scale)`
			`output = output[..., :h_old * scale, :w_old * scale]`
			`output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()`
			`if output.ndim == 3:`
			`output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0)) # CHW-RGB to HCW-BGR`
			`output = (output * 255.0).round().astype(np.uint8) # float32 to uint8`
make swinir actually useful 2022-09-20 13:36:20 +00:00			`return Image.fromarray(output, 'RGB')`
extremely basic and incomplete swinir implementation 2022-09-19 20:05:12 +00:00

			`def inference(img, model, tile, tile_overlap, window_size, scale):`
			`# test the image tile by tile`
			`b, c, h, w = img.size()`
			`tile = min(tile, h, w)`
			`assert tile % window_size == 0, "tile size should be a multiple of window_size"`
			`sf = scale`

			`stride = tile - tile_overlap`
			`h_idx_list = list(range(0, h-tile, stride)) + [h-tile]`
			`w_idx_list = list(range(0, w-tile, stride)) + [w-tile]`
			`E = torch.zeros(b, c, hsf, wsf, dtype=torch.half, device=device).type_as(img)`
			`W = torch.zeros_like(E, dtype=torch.half, device=device)`

			`for h_idx in h_idx_list:`
			`for w_idx in w_idx_list:`
			`in_patch = img[..., h_idx:h_idx+tile, w_idx:w_idx+tile]`
			`out_patch = model(in_patch)`
			`out_patch_mask = torch.ones_like(out_patch)`

			`E[..., h_idxsf:(h_idx+tile)sf, w_idxsf:(w_idx+tile)sf].add_(out_patch)`
			`W[..., h_idxsf:(h_idx+tile)sf, w_idxsf:(w_idx+tile)sf].add_(out_patch_mask)`
			`output = E.div_(W)`

make swinir actually useful 2022-09-20 13:36:20 +00:00			`return output`

			`class UpscalerSwin(modules.images.Upscaler):`
			`def __init__(self, title):`
			`self.name = title`

			`def do_upscale(self, img):`
			`img = upscale(img)`
			`return img`