From f2055cb1d4ce45d7aaacc49d8ab5bec7791a8f47 Mon Sep 17 00:00:00 2001 From: brkirch Date: Sat, 8 Oct 2022 01:47:02 -0400 Subject: [PATCH] Add hypernetwork support to split cross attention v1 * Add hypernetwork support to split_cross_attention_forward_v1 * Fix device check in esrgan_model.py to use devices.device_esrgan instead of shared.device --- modules/esrgan_model.py | 2 +- modules/sd_hijack_optimizations.py | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/modules/esrgan_model.py b/modules/esrgan_model.py index d17e730f..28548124 100644 --- a/modules/esrgan_model.py +++ b/modules/esrgan_model.py @@ -111,7 +111,7 @@ class UpscalerESRGAN(Upscaler): print("Unable to load %s from %s" % (self.model_path, filename)) return None - pretrained_net = torch.load(filename, map_location='cpu' if shared.device.type == 'mps' else None) + pretrained_net = torch.load(filename, map_location='cpu' if devices.device_esrgan.type == 'mps' else None) crt_model = arch.RRDBNet(3, 3, 64, 23, gc=32) pretrained_net = fix_model_layers(crt_model, pretrained_net) diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index d9cca485..3351c740 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -12,13 +12,22 @@ from modules import shared def split_cross_attention_forward_v1(self, x, context=None, mask=None): h = self.heads - q = self.to_q(x) + q_in = self.to_q(x) context = default(context, x) - k = self.to_k(context) - v = self.to_v(context) + + hypernetwork = shared.selected_hypernetwork() + hypernetwork_layers = (hypernetwork.layers if hypernetwork is not None else {}).get(context.shape[2], None) + + if hypernetwork_layers is not None: + k_in = self.to_k(hypernetwork_layers[0](context)) + v_in = self.to_v(hypernetwork_layers[1](context)) + else: + k_in = self.to_k(context) + v_in = self.to_v(context) del context, x - q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v)) + q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q_in, k_in, v_in)) + del q_in, k_in, v_in r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device) for i in range(0, q.shape[0], 2): @@ -31,6 +40,7 @@ def split_cross_attention_forward_v1(self, x, context=None, mask=None): r1[i:end] = einsum('b i j, b j d -> b i d', s2, v[i:end]) del s2 + del q, k, v r2 = rearrange(r1, '(b h) n d -> b n (h d)', h=h) del r1