From 5dc05c0d0dc6a0040b0beb93f082ab314513d069 Mon Sep 17 00:00:00 2001 From: Elias Oenal Date: Sun, 11 Sep 2022 21:11:02 +0200 Subject: [PATCH] Implemented workaround to allow the use of seeds with the mps/metal backend. Fixed img2img's use of unsupported precision float64 with mps backend. --- modules/processing.py | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/modules/processing.py b/modules/processing.py index cf2e13d3..80bf7cc0 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1,3 +1,6 @@ +# Metal backend fixes written and placed +# into the public domain by Elias Oenal + import contextlib import json import math @@ -105,18 +108,32 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see for i, seed in enumerate(seeds): noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8) + # Pytorch currently doesn't handle seeting randomness correctly when the metal backend is used. + if shared.device.type == 'mps': + g = torch.Generator(device='cpu') + subnoise = None if subseeds is not None: subseed = 0 if i >= len(subseeds) else subseeds[i] - torch.manual_seed(subseed) - subnoise = torch.randn(noise_shape, device=shared.device) + if shared.device.type == 'mps': + g.manual_seed(subseed) + subnoise = torch.randn(noise_shape, generator=g, device='cpu').to('mps') + else: # cpu or cuda + torch.manual_seed(subseed) + subnoise = torch.randn(noise_shape, device=shared.device) # randn results depend on device; gpu and cpu get different results for same seed; # the way I see it, it's better to do this on CPU, so that everyone gets same result; # but the original script had it like this, so I do not dare change it for now because # it will break everyone's seeds. - torch.manual_seed(seed) - noise = torch.randn(noise_shape, device=shared.device) + # When using the mps backend falling back to the cpu device is needed, since mps currently + # does not implement seeding properly. + if shared.device.type == 'mps': + g.manual_seed(seed) + noise = torch.randn(noise_shape, generator=g, device='cpu').to('mps') + else: # cpu or cuda + torch.manual_seed(seed) + x = torch.randn(shape, device=shared.device) if subnoise is not None: #noise = subnoise * subseed_strength + noise * (1 - subseed_strength) @@ -127,8 +144,12 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see # noise_shape = (64, 80) # shape = (64, 72) - torch.manual_seed(seed) - x = torch.randn(shape, device=shared.device) + if shared.device.type == 'mps': + g.manual_seed(seed) + x = torch.randn(shape, generator=g, device='cpu').to('mps') + else: + torch.manual_seed(seed) + x = torch.randn(shape, device=shared.device) dx = (shape[2] - noise_shape[2]) // 2 # -4 dy = (shape[1] - noise_shape[1]) // 2 w = noise_shape[2] if dx >= 0 else noise_shape[2] + 2 * dx @@ -463,7 +484,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if self.image_mask is not None: init_mask = latent_mask latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) - latmask = np.moveaxis(np.array(latmask, dtype=np.float64), 2, 0) / 255 + if shared.device.type == 'mps': # mps backend does not support float64 + latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255 + else: + latmask = np.moveaxis(np.array(latmask, dtype=np.float64), 2, 0) / 255 latmask = latmask[0] latmask = np.around(latmask) latmask = np.tile(latmask[None], (4, 1, 1))