This commit is contained in:
mrq 2025-02-28 18:06:41 -06:00
parent a174c33db6
commit 4e7d885542

View File

@ -449,9 +449,9 @@ class Base_V2(nn.Module):
hidden_act="gelu", hidden_act="gelu",
is_encoder_decoder=False, is_encoder_decoder=False,
is_decoder=True, is_decoder=True,
output_norm=not per_level_normalization, # moves the LN out to the decoder
#gradient_checkpointing=self.gradient_checkpointing, #gradient_checkpointing=self.gradient_checkpointing,
) )
self.model_config.output_norm = not per_level_normalization # moves the LN out to the decoder
self.model_config.attn_mode = attention_backend self.model_config.attn_mode = attention_backend
self.model = LlamaModel(self.model_config) self.model = LlamaModel(self.model_config)