stable-diffusion-webui/modules/hypernetworks/hypernetwork.py

396 lines
14 KiB
Python
Raw Normal View History

import csv
2022-10-07 20:22:22 +00:00
import datetime
import glob
import html
import os
import sys
import traceback
import modules.textual_inversion.dataset
2022-10-07 20:22:22 +00:00
import torch
import tqdm
2022-10-07 20:22:22 +00:00
from einops import rearrange, repeat
from ldm.util import default
from modules import devices, processing, sd_models, shared
from modules.textual_inversion import textual_inversion
from modules.textual_inversion.learn_schedule import LearnRateScheduler
from torch import einsum
def parse_layer_structure(dim, state_dict):
i = 0
res = [1]
while (key := "linear.{}.weight".format(i)) in state_dict:
weight = state_dict[key]
res.append(len(weight) // dim)
i += 1
return res
2022-10-07 20:22:22 +00:00
class HypernetworkModule(torch.nn.Module):
2022-10-13 17:12:37 +00:00
multiplier = 1.0
layer_structure = None
add_layer_norm = False
2022-10-13 17:12:37 +00:00
2022-10-07 20:22:22 +00:00
def __init__(self, dim, state_dict=None):
super().__init__()
if (state_dict is None or 'linear.0.weight' not in state_dict) and self.layer_structure is None:
layer_structure = (1, 2, 1)
else:
if self.layer_structure is not None:
assert self.layer_structure[0] == 1, "Multiplier Sequence should start with size 1!"
assert self.layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!"
layer_structure = self.layer_structure
else:
layer_structure = parse_layer_structure(dim, state_dict)
2022-10-07 20:22:22 +00:00
linears = []
for i in range(len(layer_structure) - 1):
linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1])))
if self.add_layer_norm:
linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1])))
self.linear = torch.nn.Sequential(*linears)
2022-10-07 20:22:22 +00:00
if state_dict is not None:
try:
self.load_state_dict(state_dict)
except RuntimeError:
self.try_load_previous(state_dict)
2022-10-07 20:22:22 +00:00
else:
for layer in self.linear:
layer.weight.data.normal_(mean = 0.0, std = 0.01)
layer.bias.data.zero_()
2022-10-07 20:22:22 +00:00
self.to(devices.device)
def try_load_previous(self, state_dict):
states = self.state_dict()
states['linear.0.bias'].copy_(state_dict['linear1.bias'])
states['linear.0.weight'].copy_(state_dict['linear1.weight'])
states['linear.1.bias'].copy_(state_dict['linear2.bias'])
states['linear.1.weight'].copy_(state_dict['linear2.weight'])
2022-10-07 20:22:22 +00:00
def forward(self, x):
return x + self.linear(x) * self.multiplier
def trainables(self):
res = []
for layer in self.linear:
res += [layer.weight, layer.bias]
return res
2022-10-13 17:12:37 +00:00
def apply_strength(value=None):
HypernetworkModule.multiplier = value if value is not None else shared.opts.sd_hypernetwork_strength
2022-10-07 20:22:22 +00:00
def apply_layer_structure(value=None):
HypernetworkModule.layer_structure = value if value is not None else shared.opts.sd_hypernetwork_layer_structure
def apply_layer_norm(value=None):
HypernetworkModule.add_layer_norm = value if value is not None else shared.opts.sd_hypernetwork_add_layer_norm
2022-10-07 20:22:22 +00:00
class Hypernetwork:
filename = None
name = None
def __init__(self, name=None, enable_sizes=None):
2022-10-07 20:22:22 +00:00
self.filename = None
self.name = name
self.layers = {}
self.step = 0
self.sd_checkpoint = None
self.sd_checkpoint_name = None
for size in enable_sizes or []:
2022-10-07 20:22:22 +00:00
self.layers[size] = (HypernetworkModule(size), HypernetworkModule(size))
def weights(self):
res = []
for k, layers in self.layers.items():
for layer in layers:
layer.train()
res += layer.trainables()
2022-10-07 20:22:22 +00:00
return res
def save(self, filename):
state_dict = {}
for k, v in self.layers.items():
state_dict[k] = (v[0].state_dict(), v[1].state_dict())
state_dict['step'] = self.step
state_dict['name'] = self.name
state_dict['sd_checkpoint'] = self.sd_checkpoint
state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
torch.save(state_dict, filename)
def load(self, filename):
self.filename = filename
if self.name is None:
self.name = os.path.splitext(os.path.basename(filename))[0]
state_dict = torch.load(filename, map_location='cpu')
for size, sd in state_dict.items():
if type(size) == int:
self.layers[size] = (HypernetworkModule(size, sd[0]), HypernetworkModule(size, sd[1]))
self.name = state_dict.get('name', self.name)
self.step = state_dict.get('step', 0)
self.sd_checkpoint = state_dict.get('sd_checkpoint', None)
self.sd_checkpoint_name = state_dict.get('sd_checkpoint_name', None)
2022-10-11 11:53:02 +00:00
def list_hypernetworks(path):
2022-10-07 20:22:22 +00:00
res = {}
2022-10-11 11:53:02 +00:00
for filename in glob.iglob(os.path.join(path, '**/*.pt'), recursive=True):
name = os.path.splitext(os.path.basename(filename))[0]
res[name] = filename
return res
2022-10-07 20:22:22 +00:00
2022-10-11 11:53:02 +00:00
def load_hypernetwork(filename):
path = shared.hypernetworks.get(filename, None)
if path is not None:
print(f"Loading hypernetwork {filename}")
2022-10-07 20:22:22 +00:00
try:
2022-10-11 11:53:02 +00:00
shared.loaded_hypernetwork = Hypernetwork()
shared.loaded_hypernetwork.load(path)
2022-10-07 20:22:22 +00:00
except Exception:
2022-10-11 11:53:02 +00:00
print(f"Error loading hypernetwork {path}", file=sys.stderr)
2022-10-07 20:22:22 +00:00
print(traceback.format_exc(), file=sys.stderr)
2022-10-11 11:53:02 +00:00
else:
if shared.loaded_hypernetwork is not None:
print(f"Unloading hypernetwork")
2022-10-07 20:22:22 +00:00
2022-10-11 11:53:02 +00:00
shared.loaded_hypernetwork = None
2022-10-07 20:22:22 +00:00
def find_closest_hypernetwork_name(search: str):
if not search:
return None
search = search.lower()
applicable = [name for name in shared.hypernetworks if search in name.lower()]
if not applicable:
return None
applicable = sorted(applicable, key=lambda name: len(name))
return applicable[0]
2022-10-11 11:53:02 +00:00
def apply_hypernetwork(hypernetwork, context, layer=None):
hypernetwork_layers = (hypernetwork.layers if hypernetwork is not None else {}).get(context.shape[2], None)
2022-10-07 20:22:22 +00:00
2022-10-11 11:53:02 +00:00
if hypernetwork_layers is None:
return context, context
2022-10-07 20:22:22 +00:00
2022-10-11 11:53:02 +00:00
if layer is not None:
layer.hyper_k = hypernetwork_layers[0]
layer.hyper_v = hypernetwork_layers[1]
2022-10-07 20:22:22 +00:00
2022-10-11 11:53:02 +00:00
context_k = hypernetwork_layers[0](context)
context_v = hypernetwork_layers[1](context)
return context_k, context_v
2022-10-07 20:22:22 +00:00
2022-10-11 11:53:02 +00:00
def attention_CrossAttention_forward(self, x, context=None, mask=None):
h = self.heads
q = self.to_q(x)
context = default(context, x)
2022-10-07 20:22:22 +00:00
2022-10-11 11:53:02 +00:00
context_k, context_v = apply_hypernetwork(shared.loaded_hypernetwork, context, self)
2022-10-07 20:22:22 +00:00
k = self.to_k(context_k)
v = self.to_v(context_v)
q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v))
sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
if mask is not None:
mask = rearrange(mask, 'b ... -> b (...)')
max_neg_value = -torch.finfo(sim.dtype).max
mask = repeat(mask, 'b j -> (b h) () j', h=h)
sim.masked_fill_(~mask, max_neg_value)
# attention, what we cannot get enough of
attn = sim.softmax(dim=-1)
out = einsum('b i j, b j d -> b i d', attn, v)
out = rearrange(out, '(b h) n d -> b n (h d)', h=h)
return self.to_out(out)
def stack_conds(conds):
if len(conds) == 1:
return torch.stack(conds)
# same as in reconstruct_multicond_batch
token_count = max([x.shape[0] for x in conds])
for i in range(len(conds)):
if conds[i].shape[0] != token_count:
last_vector = conds[i][-1:]
last_vector_repeated = last_vector.repeat([token_count - conds[i].shape[0], 1])
conds[i] = torch.vstack([conds[i], last_vector_repeated])
return torch.stack(conds)
def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
assert hypernetwork_name, 'hypernetwork not selected'
2022-10-07 20:22:22 +00:00
2022-10-11 11:53:02 +00:00
path = shared.hypernetworks.get(hypernetwork_name, None)
shared.loaded_hypernetwork = Hypernetwork()
shared.loaded_hypernetwork.load(path)
2022-10-07 20:22:22 +00:00
shared.state.textinfo = "Initializing hypernetwork training..."
shared.state.job_count = steps
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%m-%d"), hypernetwork_name)
unload = shared.opts.unload_models_when_training
2022-10-07 20:22:22 +00:00
if save_hypernetwork_every > 0:
hypernetwork_dir = os.path.join(log_directory, "hypernetworks")
os.makedirs(hypernetwork_dir, exist_ok=True)
else:
hypernetwork_dir = None
if create_image_every > 0:
images_dir = os.path.join(log_directory, "images")
os.makedirs(images_dir, exist_ok=True)
else:
images_dir = None
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
with torch.autocast("cuda"):
ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=512, height=512, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size)
assert ds.length > 1, "Dataset should contain more than 1 images"
if unload:
shared.sd_model.cond_stage_model.to(devices.cpu)
shared.sd_model.first_stage_model.to(devices.cpu)
2022-10-07 20:22:22 +00:00
2022-10-11 11:53:02 +00:00
hypernetwork = shared.loaded_hypernetwork
2022-10-07 20:22:22 +00:00
weights = hypernetwork.weights()
for weight in weights:
weight.requires_grad = True
losses = torch.zeros((32,))
last_saved_file = "<none>"
last_saved_image = "<none>"
ititial_step = hypernetwork.step or 0
if ititial_step > steps:
return hypernetwork, filename
scheduler = LearnRateScheduler(learn_rate, steps, ititial_step)
optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate)
2022-10-11 19:03:05 +00:00
2022-10-11 11:53:02 +00:00
pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step)
for i, entries in pbar:
2022-10-07 20:22:22 +00:00
hypernetwork.step = i + ititial_step
scheduler.apply(optimizer, hypernetwork.step)
if scheduler.finished:
break
2022-10-07 20:22:22 +00:00
if shared.state.interrupted:
break
with torch.autocast("cuda"):
c = stack_conds([entry.cond for entry in entries]).to(devices.device)
c = torch.vstack([entry.cond for entry in entries]).to(devices.device)
x = torch.stack([entry.latent for entry in entries]).to(devices.device)
loss = shared.sd_model(x, c)[0]
2022-10-07 20:22:22 +00:00
del x
del c
2022-10-07 20:22:22 +00:00
losses[hypernetwork.step % losses.shape[0]] = loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
2022-10-15 12:47:08 +00:00
mean_loss = losses.mean()
if torch.isnan(mean_loss):
raise RuntimeError("Loss diverged.")
pbar.set_description(f"loss: {mean_loss:.7f}")
2022-10-07 20:22:22 +00:00
if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0:
last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name}-{hypernetwork.step}.pt')
hypernetwork.save(last_saved_file)
textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), {
2022-10-15 12:47:08 +00:00
"loss": f"{mean_loss:.7f}",
"learn_rate": f"{scheduler.learn_rate:.7f}"
})
2022-10-07 20:22:22 +00:00
if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0:
last_saved_image = os.path.join(images_dir, f'{hypernetwork_name}-{hypernetwork.step}.png')
optimizer.zero_grad()
shared.sd_model.cond_stage_model.to(devices.device)
shared.sd_model.first_stage_model.to(devices.device)
2022-10-07 20:22:22 +00:00
p = processing.StableDiffusionProcessingTxt2Img(
sd_model=shared.sd_model,
do_not_save_grid=True,
do_not_save_samples=True,
)
if preview_from_txt2img:
p.prompt = preview_prompt
p.negative_prompt = preview_negative_prompt
p.steps = preview_steps
p.sampler_index = preview_sampler_index
p.cfg_scale = preview_cfg_scale
p.seed = preview_seed
p.width = preview_width
p.height = preview_height
else:
p.prompt = entries[0].cond_text
p.steps = 20
preview_text = p.prompt
2022-10-07 20:22:22 +00:00
processed = processing.process_images(p)
image = processed.images[0] if len(processed.images)>0 else None
2022-10-07 20:22:22 +00:00
if unload:
shared.sd_model.cond_stage_model.to(devices.cpu)
shared.sd_model.first_stage_model.to(devices.cpu)
if image is not None:
shared.state.current_image = image
image.save(last_saved_image)
last_saved_image += f", prompt: {preview_text}"
2022-10-07 20:22:22 +00:00
shared.state.job_no = hypernetwork.step
shared.state.textinfo = f"""
<p>
2022-10-15 12:47:08 +00:00
Loss: {mean_loss:.7f}<br/>
2022-10-07 20:22:22 +00:00
Step: {hypernetwork.step}<br/>
Last prompt: {html.escape(entries[0].cond_text)}<br/>
2022-10-07 20:22:22 +00:00
Last saved embedding: {html.escape(last_saved_file)}<br/>
Last saved image: {html.escape(last_saved_image)}<br/>
</p>
"""
checkpoint = sd_models.select_checkpoint()
hypernetwork.sd_checkpoint = checkpoint.hash
hypernetwork.sd_checkpoint_name = checkpoint.model_name
hypernetwork.save(filename)
return hypernetwork, filename