Implemented workaround to allow the use of seeds with the mps/metal backend. Fixed img2img's use of unsupported precision float64 with mps backend.

This commit is contained in:
Elias Oenal 2022-09-11 21:11:02 +02:00
parent 2920ca7892
commit 5dc05c0d0d

View File

@ -1,3 +1,6 @@
# Metal backend fixes written and placed
# into the public domain by Elias Oenal <sd@eliasoenal.com>
import contextlib import contextlib
import json import json
import math import math
@ -105,18 +108,32 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
for i, seed in enumerate(seeds): for i, seed in enumerate(seeds):
noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8) noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8)
# Pytorch currently doesn't handle seeting randomness correctly when the metal backend is used.
if shared.device.type == 'mps':
g = torch.Generator(device='cpu')
subnoise = None subnoise = None
if subseeds is not None: if subseeds is not None:
subseed = 0 if i >= len(subseeds) else subseeds[i] subseed = 0 if i >= len(subseeds) else subseeds[i]
torch.manual_seed(subseed) if shared.device.type == 'mps':
subnoise = torch.randn(noise_shape, device=shared.device) g.manual_seed(subseed)
subnoise = torch.randn(noise_shape, generator=g, device='cpu').to('mps')
else: # cpu or cuda
torch.manual_seed(subseed)
subnoise = torch.randn(noise_shape, device=shared.device)
# randn results depend on device; gpu and cpu get different results for same seed; # randn results depend on device; gpu and cpu get different results for same seed;
# the way I see it, it's better to do this on CPU, so that everyone gets same result; # the way I see it, it's better to do this on CPU, so that everyone gets same result;
# but the original script had it like this, so I do not dare change it for now because # but the original script had it like this, so I do not dare change it for now because
# it will break everyone's seeds. # it will break everyone's seeds.
torch.manual_seed(seed) # When using the mps backend falling back to the cpu device is needed, since mps currently
noise = torch.randn(noise_shape, device=shared.device) # does not implement seeding properly.
if shared.device.type == 'mps':
g.manual_seed(seed)
noise = torch.randn(noise_shape, generator=g, device='cpu').to('mps')
else: # cpu or cuda
torch.manual_seed(seed)
x = torch.randn(shape, device=shared.device)
if subnoise is not None: if subnoise is not None:
#noise = subnoise * subseed_strength + noise * (1 - subseed_strength) #noise = subnoise * subseed_strength + noise * (1 - subseed_strength)
@ -127,8 +144,12 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
# noise_shape = (64, 80) # noise_shape = (64, 80)
# shape = (64, 72) # shape = (64, 72)
torch.manual_seed(seed) if shared.device.type == 'mps':
x = torch.randn(shape, device=shared.device) g.manual_seed(seed)
x = torch.randn(shape, generator=g, device='cpu').to('mps')
else:
torch.manual_seed(seed)
x = torch.randn(shape, device=shared.device)
dx = (shape[2] - noise_shape[2]) // 2 # -4 dx = (shape[2] - noise_shape[2]) // 2 # -4
dy = (shape[1] - noise_shape[1]) // 2 dy = (shape[1] - noise_shape[1]) // 2
w = noise_shape[2] if dx >= 0 else noise_shape[2] + 2 * dx w = noise_shape[2] if dx >= 0 else noise_shape[2] + 2 * dx
@ -463,7 +484,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
if self.image_mask is not None: if self.image_mask is not None:
init_mask = latent_mask init_mask = latent_mask
latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
latmask = np.moveaxis(np.array(latmask, dtype=np.float64), 2, 0) / 255 if shared.device.type == 'mps': # mps backend does not support float64
latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255
else:
latmask = np.moveaxis(np.array(latmask, dtype=np.float64), 2, 0) / 255
latmask = latmask[0] latmask = latmask[0]
latmask = np.around(latmask) latmask = np.around(latmask)
latmask = np.tile(latmask[None], (4, 1, 1)) latmask = np.tile(latmask[None], (4, 1, 1))