vall-e/vall_e/export.py

import argparse

import torch
import torch.nn

from .data import get_phone_symmap
from .engines import load_engines
from .config import cfg
from .models.lora import lora_get_state_dict
from .utils.io import torch_save, torch_load

# stitches embeddings into one embedding & classifier => lm_head, for use in a HF compatible weight
def convert_to_hf( state_dict, config = None, save_path = None ):
	n_tokens = 256 + (1024 * 8) + (1024 * 8) + 1
	token_dim = 1024
	embedding = torch.nn.Embedding(n_tokens, token_dim)
	embedding.weight.requires_grad = False

	def move_value(k):
		v = state_dict['module'][k]
		del state_dict['module'][k]
		return v

	separator = move_value('sep')
	out_proj = move_value('classifier.weight')
	text_emb = move_value('text_emb.weight')
	langs_emb = move_value('langs_emb.weight')
	tasks_emb = move_value('tasks_emb.weight')
	tones_emb = move_value('tones_emb.weight')
	
	proms_emb_weight = [ move_value(f'proms_emb.weight.{i}').item() for i in range(8) ] if "proms_emb.weight.0" in state_dict['module'] else [ [ 1 for _ in range(8) ] ]
	resps_emb_weight = [ move_value(f'resps_emb.weight.{i}').item() for i in range(8) ] if "resps_emb.weight.0" in state_dict['module'] else [ [ 1 for _ in range(8) ] ]

	proms_emb = [ move_value(f'proms_emb.embeddings.{i}.weight') for i in range(8) ]
	resps_emb = [ move_value(f'resps_emb.embeddings.{i}.weight') for i in range(8) ]


	start = 0
	for i in range(256):
		embedding.weight[start + i] = text_emb[i]
	
	start = 256
	for layer in range(8):
		for i in range(1024):
			offset = start + 1024 * layer
			embedding.weight[i + offset] = proms_emb[layer][i] * proms_emb_weight[layer]
			
	start = 256 + 1024 * 8
	for layer in range(8):
		for i in range(1024):
			offset = start + 1024 * layer
			embedding.weight[i + offset] = resps_emb[layer][i] * proms_emb_weight[layer]

	state_dict['module']['model.embed_tokens.weight'] = embedding.state_dict()
	# to-do: properly recreate the output head weights or something
	state_dict['module']['lm_head.weight'] = out_proj
	
	del state_dict['module']['classifier.weight']
	del state_dict['module']['classifier.bias']

	return state_dict

# yanks a LoRA from the training checkpoint
def extract_lora( state_dict, config = None, save_path = None, dtype = None ):
	if dtype is None:
		dtype = cfg.inference.dtype

	format = save_path.suffix[1:]

	lora = state_dict["lora"] if "lora" in state_dict else None
	# should always be included, but just in case
	if lora is None and "module" in state_dict:
		lora, module = lora_get_state_dict( state_dict["module"], split = True )
		state_dict["module"] = module
	
	if "lora" in state_dict:
		state_dict["lora"] = None

	# should raise an exception since there's nothing to extract, or at least a warning
	if not lora:
		return state_dict

	# save lora specifically
	# should probably export other attributes, similar to what SD LoRAs do
	save_path = save_path.parent / f"lora.{format}"
	torch_save( {
		"module": lora,
		"config": cfg.lora.__dict__ if cfg.lora is not None else None,
	}, save_path )

	return state_dict

# copies a single classifier head into multiple classifier heads per RVQ level
def split_classifier_heads( state_dict, config = cfg.model, save_path = None, dtype = None):
	levels = config.max_levels

	if "classifier.weight" not in state_dict['module']:
		return state_dict
	# copy to new AudioClassifier
	for i in range(levels):
		tokens = 1025 if i == 0 else 1024

		# trim per RVQ level (since level 0 has a stop token)
		state_dict['module'][f'classifiers.proj.{i}.weight'] = state_dict['module']['classifier.weight'][:tokens, :].clone()
		state_dict['module'][f'classifiers.proj.{i}.bias'] = state_dict['module']['classifier.bias'][:tokens].clone()

	# delete old weights
	del state_dict['module']['classifier.weight']
	del state_dict['module']['classifier.bias']

	return state_dict

# converts a normal LLaMA model to a MoE model, as best as I can
def moe_ify( state_dict, config = cfg.model, save_path = None, dtype = None ):
	# to-do: find a good way to pass in requested experts
	experts = 8
	for layer in range( config.layers ):
		#state_dict[f'model.layers.{layer}.block_sparse_moe.gate.weight'] = torch.randn((config.dim, experts))
		for expert in range( experts ):
			state_dict['module'][f'model.layers.{layer}.block_sparse_moe.experts.{expert}.w1.weight'] = state_dict['module'][f'model.layers.{layer}.mlp.up_proj.weight'].clone()
			state_dict['module'][f'model.layers.{layer}.block_sparse_moe.experts.{expert}.w2.weight'] = state_dict['module'][f'model.layers.{layer}.mlp.down_proj.weight'].clone()
			state_dict['module'][f'model.layers.{layer}.block_sparse_moe.experts.{expert}.w3.weight'] = state_dict['module'][f'model.layers.{layer}.mlp.gate_proj.weight'].clone()

		del state_dict['module'][f'model.layers.{layer}.mlp.up_proj.weight']
		del state_dict['module'][f'model.layers.{layer}.mlp.down_proj.weight']
		del state_dict['module'][f'model.layers.{layer}.mlp.gate_proj.weight']

	return state_dict

def main():
	parser = argparse.ArgumentParser("Save trained model to path.")
	parser.add_argument("--module-only", action='store_true')
	parser.add_argument("--hf", action='store_true', default=None) # convert to HF-style
	parser.add_argument("--lora", action='store_true', default=None) # exports LoRA
	parser.add_argument("--split-classifiers", action='store_true', default=None) # splits classifier heads
	parser.add_argument("--moe-ify", action='store_true', default=None) # splits classifier heads
	parser.add_argument("--experts", type=int, default=8) # set target dtype to export to
	parser.add_argument("--dtype", type=str, default="auto") # set target dtype to export to
	parser.add_argument("--format", type=str, default=cfg.weights_format) # set target format to export weights under
	args, unknown = parser.parse_known_args()

	if args.format.lower() not in ["sft", "safetensors", "pt", "pth"]:
		raise Exception(f"Unknown requested format: {args.format}")

	if args.module_only:
		cfg.trainer.load_module_only = True


	if args.hf and args.lora:
		raise Exception("Requesting more than one callback")

	if args.dtype != "auto":
		cfg.trainer.weight_dtype = args.dtype
		
	# necessary to ensure we are actually exporting the weights right
	cfg.inference.backend = cfg.trainer.backend

	engines = load_engines(training=False) # to ignore loading optimizer state

	callback = None
	if args.hf:
		callback = convert_to_hf
	elif args.lora:
		callback = extract_lora
	elif args.split_classifiers:
		callback = split_classifier_heads
	elif args.moe_ify:
		callback = moe_ify
		# set it here after the model loads to not influence which model loads
		cfg.model.experts = args.experts
		for name, engine in engines.items():
			engine.module.config.experts = args.experts
			engine.hyper_config.experts = args.experts

	engines.export(userdata={"symmap": get_phone_symmap()}, callback=callback, format=args.format)

if __name__ == "__main__":
	main()
Rewrite init 2023-08-02 21:53:35 +00:00			`import argparse`

			`import torch`
added a flag to convert to a HF compatible model on export by stitching things 2024-06-04 03:34:47 +00:00			`import torch.nn`
Rewrite init 2023-08-02 21:53:35 +00:00
			`from .data import get_phone_symmap`
cleanup, use deepspeed inferencing pathway if requested 2023-10-09 20:24:04 +00:00			`from .engines import load_engines`
made exporter make more sense 2023-08-14 03:56:28 +00:00			`from .config import cfg`
actually make deepspeed work with LoRAs 2024-06-17 18:55:37 +00:00			`from .models.lora import lora_get_state_dict`
added safetensors support (with metadata) and feed whatever torch.load/torch.save into it 2024-08-04 04:15:20 +00:00			`from .utils.io import torch_save, torch_load`
Rewrite init 2023-08-02 21:53:35 +00:00
added export option to convert Llama to MixtralMoE for another dumb experiment 2024-08-05 01:25:06 +00:00			`# stitches embeddings into one embedding & classifier => lm_head, for use in a HF compatible weight`
actually make deepspeed work with LoRAs 2024-06-17 18:55:37 +00:00			`def convert_to_hf( state_dict, config = None, save_path = None ):`
added a flag to convert to a HF compatible model on export by stitching things 2024-06-04 03:34:47 +00:00			`n_tokens = 256 + (1024 * 8) + (1024 * 8) + 1`
			`token_dim = 1024`
			`embedding = torch.nn.Embedding(n_tokens, token_dim)`
			`embedding.weight.requires_grad = False`

			`def move_value(k):`
			`v = state_dict['module'][k]`
			`del state_dict['module'][k]`
			`return v`

			`separator = move_value('sep')`
			`out_proj = move_value('classifier.weight')`
			`text_emb = move_value('text_emb.weight')`
			`langs_emb = move_value('langs_emb.weight')`
			`tasks_emb = move_value('tasks_emb.weight')`
			`tones_emb = move_value('tones_emb.weight')`

			`proms_emb_weight = [ move_value(f'proms_emb.weight.{i}').item() for i in range(8) ] if "proms_emb.weight.0" in state_dict['module'] else [ [ 1 for _ in range(8) ] ]`
			`resps_emb_weight = [ move_value(f'resps_emb.weight.{i}').item() for i in range(8) ] if "resps_emb.weight.0" in state_dict['module'] else [ [ 1 for _ in range(8) ] ]`

			`proms_emb = [ move_value(f'proms_emb.embeddings.{i}.weight') for i in range(8) ]`
			`resps_emb = [ move_value(f'resps_emb.embeddings.{i}.weight') for i in range(8) ]`


			`start = 0`
			`for i in range(256):`
			`embedding.weight[start + i] = text_emb[i]`

			`start = 256`
			`for layer in range(8):`
			`for i in range(1024):`
			`offset = start + 1024 * layer`
			`embedding.weight[i + offset] = proms_emb[layer][i] * proms_emb_weight[layer]`

			`start = 256 + 1024 * 8`
			`for layer in range(8):`
			`for i in range(1024):`
			`offset = start + 1024 * layer`
			`embedding.weight[i + offset] = resps_emb[layer][i] * proms_emb_weight[layer]`

			`state_dict['module']['model.embed_tokens.weight'] = embedding.state_dict()`
added export option to convert Llama to MixtralMoE for another dumb experiment 2024-08-05 01:25:06 +00:00			`# to-do: properly recreate the output head weights or something`
added a flag to convert to a HF compatible model on export by stitching things 2024-06-04 03:34:47 +00:00			`state_dict['module']['lm_head.weight'] = out_proj`
fix weird regression in handling checkpoints when backend is local, but deepspeed checkpoints are in (it was handled with LoRA loading but not real loading...) 2024-07-31 03:15:56 +00:00
added export option to convert Llama to MixtralMoE for another dumb experiment 2024-08-05 01:25:06 +00:00			`del state_dict['module']['classifier.weight']`
added a flag to convert to a HF compatible model on export by stitching things 2024-06-04 03:34:47 +00:00			`del state_dict['module']['classifier.bias']`

actually make deepspeed work with LoRAs 2024-06-17 18:55:37 +00:00			`return state_dict`

added export option to convert Llama to MixtralMoE for another dumb experiment 2024-08-05 01:25:06 +00:00			`# yanks a LoRA from the training checkpoint`
mechanism to store the model config inside the weights and load them, some other things to allow LoRA training on the RetNet (gradient checkpointing will gripe about inputs not having require_grad and nothing seems to remedy it) 2024-07-16 23:23:13 +00:00			`def extract_lora( state_dict, config = None, save_path = None, dtype = None ):`
			`if dtype is None:`
			`dtype = cfg.inference.dtype`

tweaks and fixes for lora stuffs 2024-09-08 23:05:21 +00:00			`format = save_path.suffix[1:]`
added safetensors support (with metadata) and feed whatever torch.load/torch.save into it 2024-08-04 04:15:20 +00:00
actually make deepspeed work with LoRAs 2024-06-17 18:55:37 +00:00			`lora = state_dict["lora"] if "lora" in state_dict else None`
			`# should always be included, but just in case`
			`if lora is None and "module" in state_dict:`
			`lora, module = lora_get_state_dict( state_dict["module"], split = True )`
			`state_dict["module"] = module`
added safetensors support (with metadata) and feed whatever torch.load/torch.save into it 2024-08-04 04:15:20 +00:00
			`if "lora" in state_dict:`
			`state_dict["lora"] = None`
actually make deepspeed work with LoRAs 2024-06-17 18:55:37 +00:00
			`# should raise an exception since there's nothing to extract, or at least a warning`
			`if not lora:`
			`return state_dict`
added a flag to convert to a HF compatible model on export by stitching things 2024-06-04 03:34:47 +00:00
actually make deepspeed work with LoRAs 2024-06-17 18:55:37 +00:00			`# save lora specifically`
			`# should probably export other attributes, similar to what SD LoRAs do`
added safetensors support (with metadata) and feed whatever torch.load/torch.save into it 2024-08-04 04:15:20 +00:00			`save_path = save_path.parent / f"lora.{format}"`
			`torch_save( {`
mechanism to store the model config inside the weights and load them, some other things to allow LoRA training on the RetNet (gradient checkpointing will gripe about inputs not having require_grad and nothing seems to remedy it) 2024-07-16 23:23:13 +00:00			`"module": lora,`
			`"config": cfg.lora.__dict__ if cfg.lora is not None else None,`
			`}, save_path )`
added a flag to convert to a HF compatible model on export by stitching things 2024-06-04 03:34:47 +00:00
			`return state_dict`

added export option to convert Llama to MixtralMoE for another dumb experiment 2024-08-05 01:25:06 +00:00			`# copies a single classifier head into multiple classifier heads per RVQ level`
added rudimentary demo page creator (currently just embeds base64 wavs into the page, need to test not doing that) 2024-07-20 01:49:40 +00:00			`def split_classifier_heads( state_dict, config = cfg.model, save_path = None, dtype = None):`
			`levels = config.max_levels`

			`if "classifier.weight" not in state_dict['module']:`
			`return state_dict`
			`# copy to new AudioClassifier`
			`for i in range(levels):`
			`tokens = 1025 if i == 0 else 1024`

			`# trim per RVQ level (since level 0 has a stop token)`
fix issue with sft and shared tensors... 2024-08-05 00:56:21 +00:00			`state_dict['module'][f'classifiers.proj.{i}.weight'] = state_dict['module']['classifier.weight'][:tokens, :].clone()`
			`state_dict['module'][f'classifiers.proj.{i}.bias'] = state_dict['module']['classifier.bias'][:tokens].clone()`
added rudimentary demo page creator (currently just embeds base64 wavs into the page, need to test not doing that) 2024-07-20 01:49:40 +00:00
			`# delete old weights`
			`del state_dict['module']['classifier.weight']`
			`del state_dict['module']['classifier.bias']`

			`return state_dict`
actually make deepspeed work with LoRAs 2024-06-17 18:55:37 +00:00
added export option to convert Llama to MixtralMoE for another dumb experiment 2024-08-05 01:25:06 +00:00			`# converts a normal LLaMA model to a MoE model, as best as I can`
			`def moe_ify( state_dict, config = cfg.model, save_path = None, dtype = None ):`
			`# to-do: find a good way to pass in requested experts`
			`experts = 8`
			`for layer in range( config.layers ):`
			`#state_dict[f'model.layers.{layer}.block_sparse_moe.gate.weight'] = torch.randn((config.dim, experts))`
			`for expert in range( experts ):`
			`state_dict['module'][f'model.layers.{layer}.block_sparse_moe.experts.{expert}.w1.weight'] = state_dict['module'][f'model.layers.{layer}.mlp.up_proj.weight'].clone()`
			`state_dict['module'][f'model.layers.{layer}.block_sparse_moe.experts.{expert}.w2.weight'] = state_dict['module'][f'model.layers.{layer}.mlp.down_proj.weight'].clone()`
			`state_dict['module'][f'model.layers.{layer}.block_sparse_moe.experts.{expert}.w3.weight'] = state_dict['module'][f'model.layers.{layer}.mlp.gate_proj.weight'].clone()`

			`del state_dict['module'][f'model.layers.{layer}.mlp.up_proj.weight']`
			`del state_dict['module'][f'model.layers.{layer}.mlp.down_proj.weight']`
			`del state_dict['module'][f'model.layers.{layer}.mlp.gate_proj.weight']`

			`return state_dict`

Rewrite init 2023-08-02 21:53:35 +00:00			`def main():`
			`parser = argparse.ArgumentParser("Save trained model to path.")`
ops 2023-08-20 18:42:18 +00:00			`parser.add_argument("--module-only", action='store_true')`
added a flag to convert to a HF compatible model on export by stitching things 2024-06-04 03:34:47 +00:00			`parser.add_argument("--hf", action='store_true', default=None) # convert to HF-style`
actually make deepspeed work with LoRAs 2024-06-17 18:55:37 +00:00			`parser.add_argument("--lora", action='store_true', default=None) # exports LoRA`
added rudimentary demo page creator (currently just embeds base64 wavs into the page, need to test not doing that) 2024-07-20 01:49:40 +00:00			`parser.add_argument("--split-classifiers", action='store_true', default=None) # splits classifier heads`
added export option to convert Llama to MixtralMoE for another dumb experiment 2024-08-05 01:25:06 +00:00			`parser.add_argument("--moe-ify", action='store_true', default=None) # splits classifier heads`
			`parser.add_argument("--experts", type=int, default=8) # set target dtype to export to`
mechanism to store the model config inside the weights and load them, some other things to allow LoRA training on the RetNet (gradient checkpointing will gripe about inputs not having require_grad and nothing seems to remedy it) 2024-07-16 23:23:13 +00:00			`parser.add_argument("--dtype", type=str, default="auto") # set target dtype to export to`
tweaks and fixes for lora stuffs 2024-09-08 23:05:21 +00:00			`parser.add_argument("--format", type=str, default=cfg.weights_format) # set target format to export weights under`
ugh 2024-06-09 16:39:43 +00:00			`args, unknown = parser.parse_known_args()`
Rewrite init 2023-08-02 21:53:35 +00:00
added safetensors support (with metadata) and feed whatever torch.load/torch.save into it 2024-08-04 04:15:20 +00:00			`if args.format.lower() not in ["sft", "safetensors", "pt", "pth"]:`
			`raise Exception(f"Unknown requested format: {args.format}")`

ops 2023-08-20 18:42:18 +00:00			`if args.module_only:`
			`cfg.trainer.load_module_only = True`

actually make deepspeed work with LoRAs 2024-06-17 18:55:37 +00:00
			`if args.hf and args.lora:`
			`raise Exception("Requesting more than one callback")`
added a flag to convert to a HF compatible model on export by stitching things 2024-06-04 03:34:47 +00:00
mechanism to store the model config inside the weights and load them, some other things to allow LoRA training on the RetNet (gradient checkpointing will gripe about inputs not having require_grad and nothing seems to remedy it) 2024-07-16 23:23:13 +00:00			`if args.dtype != "auto":`
			`cfg.trainer.weight_dtype = args.dtype`

might help 2024-07-23 01:57:01 +00:00			`# necessary to ensure we are actually exporting the weights right`
			`cfg.inference.backend = cfg.trainer.backend`

fix weird regression in handling checkpoints when backend is local, but deepspeed checkpoints are in (it was handled with LoRA loading but not real loading...) 2024-07-31 03:15:56 +00:00			`engines = load_engines(training=False) # to ignore loading optimizer state`
added export option to convert Llama to MixtralMoE for another dumb experiment 2024-08-05 01:25:06 +00:00
			`callback = None`
			`if args.hf:`
			`callback = convert_to_hf`
			`elif args.lora:`
			`callback = extract_lora`
			`elif args.split_classifiers:`
			`callback = split_classifier_heads`
			`elif args.moe_ify:`
			`callback = moe_ify`
			`# set it here after the model loads to not influence which model loads`
			`cfg.model.experts = args.experts`
			`for name, engine in engines.items():`
			`engine.module.config.experts = args.experts`
			`engine.hyper_config.experts = args.experts`

added safetensors support (with metadata) and feed whatever torch.load/torch.save into it 2024-08-04 04:15:20 +00:00			`engines.export(userdata={"symmap": get_phone_symmap()}, callback=callback, format=args.format)`
Rewrite init 2023-08-02 21:53:35 +00:00
			`if __name__ == "__main__":`
			`main()`