From 0c3feb202c5714abd50d879c1db2cd9a71ce93e3 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 10 Jan 2023 14:08:29 +0300 Subject: [PATCH] disable torch weight initialization and CLIP downloading/reading checkpoint to speedup creating sd model from config --- modules/sd_disable_initialization.py | 44 ++++++++++++++++++++++++++++ modules/sd_models.py | 5 ++-- 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 modules/sd_disable_initialization.py diff --git a/modules/sd_disable_initialization.py b/modules/sd_disable_initialization.py new file mode 100644 index 00000000..c9a3b5e4 --- /dev/null +++ b/modules/sd_disable_initialization.py @@ -0,0 +1,44 @@ +import ldm.modules.encoders.modules +import open_clip +import torch + + +class DisableInitialization: + """ + When an object of this class enters a `with` block, it starts preventing torch's layer initialization + functions from working, and changes CLIP and OpenCLIP to not download model weights. When it leaves, + reverts everything to how it was. + + Use like this: + ``` + with DisableInitialization(): + do_things() + ``` + """ + + def __enter__(self): + def do_nothing(*args, **kwargs): + pass + + def create_model_and_transforms_without_pretrained(*args, pretrained=None, **kwargs): + return self.create_model_and_transforms(*args, pretrained=None, **kwargs) + + def CLIPTextModel_from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs): + return self.CLIPTextModel_from_pretrained(None, *model_args, config=pretrained_model_name_or_path, state_dict={}, **kwargs) + + self.init_kaiming_uniform = torch.nn.init.kaiming_uniform_ + self.init_no_grad_normal = torch.nn.init._no_grad_normal_ + self.create_model_and_transforms = open_clip.create_model_and_transforms + self.CLIPTextModel_from_pretrained = ldm.modules.encoders.modules.CLIPTextModel.from_pretrained + + torch.nn.init.kaiming_uniform_ = do_nothing + torch.nn.init._no_grad_normal_ = do_nothing + open_clip.create_model_and_transforms = create_model_and_transforms_without_pretrained + ldm.modules.encoders.modules.CLIPTextModel.from_pretrained = CLIPTextModel_from_pretrained + + def __exit__(self, exc_type, exc_val, exc_tb): + torch.nn.init.kaiming_uniform_ = self.init_kaiming_uniform + torch.nn.init._no_grad_normal_ = self.init_no_grad_normal + open_clip.create_model_and_transforms = self.create_model_and_transforms + ldm.modules.encoders.modules.CLIPTextModel.from_pretrained = self.CLIPTextModel_from_pretrained + diff --git a/modules/sd_models.py b/modules/sd_models.py index 0a6d55ca..ee241032 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -13,7 +13,7 @@ import ldm.modules.midas as midas from ldm.util import instantiate_from_config -from modules import shared, modelloader, devices, script_callbacks, sd_vae +from modules import shared, modelloader, devices, script_callbacks, sd_vae, sd_disable_initialization from modules.paths import models_path from modules.sd_hijack_inpainting import do_inpainting_hijack, should_hijack_inpainting @@ -319,7 +319,8 @@ def load_model(checkpoint_info=None): if shared.cmd_opts.no_half: sd_config.model.params.unet_config.params.use_fp16 = False - sd_model = instantiate_from_config(sd_config.model) + with sd_disable_initialization.DisableInitialization(): + sd_model = instantiate_from_config(sd_config.model) load_model_weights(sd_model, checkpoint_info)