2023-08-02 21:53:35 +00:00
|
|
|
|
2024-02-01 03:48:36 +00:00
|
|
|
def get_model(cfg, training=True):
|
2023-08-04 01:26:36 +00:00
|
|
|
name = cfg.name
|
|
|
|
|
2024-06-04 02:28:49 +00:00
|
|
|
if not cfg.experimental:
|
2024-06-04 05:07:00 +00:00
|
|
|
from .ar_nar import AR_NAR
|
2024-06-04 02:28:49 +00:00
|
|
|
model = AR_NAR(
|
|
|
|
n_tokens=cfg.tokens,
|
|
|
|
d_model=cfg.dim,
|
|
|
|
n_heads=cfg.heads,
|
|
|
|
n_layers=cfg.layers,
|
|
|
|
n_experts=cfg.experts,
|
|
|
|
|
|
|
|
p_dropout=cfg.dropout,
|
|
|
|
|
|
|
|
l_padding = cfg.input_alignment,
|
|
|
|
|
|
|
|
training = training,
|
|
|
|
config = cfg,
|
|
|
|
)
|
|
|
|
model._cfg = cfg
|
|
|
|
else:
|
2024-06-04 05:07:00 +00:00
|
|
|
from .experimental import Model as Experimental
|
2024-06-04 02:28:49 +00:00
|
|
|
model = Experimental(
|
|
|
|
d_model=cfg.dim,
|
|
|
|
n_layers=cfg.layers,
|
|
|
|
n_heads=cfg.heads,
|
|
|
|
p_dropout=cfg.dropout,
|
|
|
|
|
|
|
|
config = cfg,
|
|
|
|
)
|
2023-08-02 21:53:35 +00:00
|
|
|
|
2023-10-13 03:21:43 +00:00
|
|
|
print(f"{name} ({next(model.parameters()).dtype}): {sum(p.numel() for p in model.parameters() if p.requires_grad)} parameters")
|
2023-08-02 21:53:35 +00:00
|
|
|
|
|
|
|
return model
|
|
|
|
|
2024-02-01 03:48:36 +00:00
|
|
|
def get_models(models, training=True):
|
|
|
|
return { model.full_name: get_model(model, training=training) for model in models }
|