vall-e/vall_e/models/__init__.py


def get_model(config, training=True):
	name = config.name

	if not config.experimental:
		from .ar_nar import AR_NAR
		model = AR_NAR(
			n_text_tokens=config.text_tokens,
			n_audio_tokens=config.audio_tokens,
			d_model=config.dim,
			n_heads=config.heads,
			n_layers=config.layers,
			n_experts=config.experts,
			
			p_dropout=config.dropout,
			
			l_padding = config.input_alignment,
			
			training = training,
			config = config,
		)
	else:
		from .experimental import Model as Experimental
		model = Experimental(
			n_text_tokens=config.text_tokens,
			n_audio_tokens=config.audio_tokens,

			d_model=config.dim,
			n_layers=config.layers,
			n_heads=config.heads,
			p_dropout=config.dropout,

			config = config,
		)

	print(f"{name} ({next(model.parameters()).dtype}): {sum(p.numel() for p in model.parameters() if p.requires_grad)} parameters")

	return model

def get_models(models, training=True):
	return { model.full_name: get_model(model, training=training) for model in models }
Rewrite init 2023-08-02 21:53:35 +00:00
re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`def get_model(config, training=True):`
			`name = config.name`
big cleanup 2023-08-04 01:26:36 +00:00
re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`if not config.experimental:`
fixes 2024-06-04 05:07:00 +00:00			`from .ar_nar import AR_NAR`
feverish cleanup 2024-06-04 02:28:49 +00:00			`model = AR_NAR(`
re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`n_text_tokens=config.text_tokens,`
			`n_audio_tokens=config.audio_tokens,`
			`d_model=config.dim,`
			`n_heads=config.heads,`
			`n_layers=config.layers,`
			`n_experts=config.experts,`
feverish cleanup 2024-06-04 02:28:49 +00:00
re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`p_dropout=config.dropout,`
feverish cleanup 2024-06-04 02:28:49 +00:00
re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`l_padding = config.input_alignment,`
feverish cleanup 2024-06-04 02:28:49 +00:00
			`training = training,`
re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`config = config,`
feverish cleanup 2024-06-04 02:28:49 +00:00			`)`
			`else:`
fixes 2024-06-04 05:07:00 +00:00			`from .experimental import Model as Experimental`
feverish cleanup 2024-06-04 02:28:49 +00:00			`model = Experimental(`
re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`n_text_tokens=config.text_tokens,`
			`n_audio_tokens=config.audio_tokens,`

			`d_model=config.dim,`
			`n_layers=config.layers,`
			`n_heads=config.heads,`
			`p_dropout=config.dropout,`
feverish cleanup 2024-06-04 02:28:49 +00:00
re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`config = config,`
feverish cleanup 2024-06-04 02:28:49 +00:00			`)`
Rewrite init 2023-08-02 21:53:35 +00:00
tweaks to try and get deepspeed quantized inferencing, validating bitsandbytes and deepspeed quantization, nothing seems to work 2023-10-13 03:21:43 +00:00			`print(f"{name} ({next(model.parameters()).dtype}): {sum(p.numel() for p in model.parameters() if p.requires_grad)} parameters")`
Rewrite init 2023-08-02 21:53:35 +00:00
			`return model`

added Mistral (non-Mixtral) backend, useless optimization when not training, proper adjustment of the LR for Prodigyopt through d_coeff (maybe), recurrent sampling for LLaMA/Mistral/Mixtral backends (again, doesn't actually work) 2024-02-01 03:48:36 +00:00			`def get_models(models, training=True):`
			`return { model.full_name: get_model(model, training=training) for model in models }`