From 7b29d32f030861eab1e8ba522e2de64ae7c13eeb Mon Sep 17 00:00:00 2001 From: shumingma Date: Tue, 29 Nov 2022 21:36:03 -0800 Subject: [PATCH] Remove unused parameters --- torchscale/architecture/config.py | 4 ++-- torchscale/architecture/decoder.py | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/torchscale/architecture/config.py b/torchscale/architecture/config.py index 4861c41..347ee23 100644 --- a/torchscale/architecture/config.py +++ b/torchscale/architecture/config.py @@ -112,10 +112,10 @@ class DecoderConfig(object): self.ddp_rank = kwargs.pop("ddp_rank", 0) if self.deepnorm: - self.encoder_normalize_before = False + self.decoder_normalize_before = False self.subln = False if self.subln: - self.encoder_normalize_before = True + self.decoder_normalize_before = True self.deepnorm = False if self.use_xmoe: self.moe_normalize_gate_prob_before_dropping = True diff --git a/torchscale/architecture/decoder.py b/torchscale/architecture/decoder.py index 704e438..b4a313f 100644 --- a/torchscale/architecture/decoder.py +++ b/torchscale/architecture/decoder.py @@ -92,11 +92,6 @@ class DecoderLayer(nn.Module): else: self.alpha = 1.0 - if args.subln: - self.ffn_layernorm = LayerNorm(self.ffn_dim) - else: - self.ffn_layernorm = None - def build_ffn(self, embed_dim, args): return FeedForwardNetwork( embed_dim,