From d1fefe9c22bad07535f56c4c461b94588dd8cc84 Mon Sep 17 00:00:00 2001 From: Li Dong Date: Wed, 27 Sep 2023 20:40:36 +0800 Subject: [PATCH] rollback LN epsilon in retention rollback https://github.com/microsoft/torchscale/commit/2c29de0fb3e5e559181f0fb4854330c5b35961cd --- torchscale/component/multiscale_retention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchscale/component/multiscale_retention.py b/torchscale/component/multiscale_retention.py index 94a15f0..f88e962 100644 --- a/torchscale/component/multiscale_retention.py +++ b/torchscale/component/multiscale_retention.py @@ -67,7 +67,7 @@ class MultiScaleRetention(nn.Module): self.out_proj = MultiwayWrapper(args, nn.Linear(embed_dim * self.factor, embed_dim, bias=True)) - self.group_norm = MultiwayWrapper(args, LayerNorm(self.head_dim, eps=1e-6, elementwise_affine=False)) + self.group_norm = MultiwayWrapper(args, LayerNorm(self.head_dim, eps=args.layernorm_eps, elementwise_affine=False)) self.reset_parameters() def reset_parameters(self):