diff --git a/torchscale/architecture/decoder.py b/torchscale/architecture/decoder.py index 0463800..704e438 100644 --- a/torchscale/architecture/decoder.py +++ b/torchscale/architecture/decoder.py @@ -400,7 +400,7 @@ class Decoder(nn.Module): ) x = x.transpose(0, 1) - # relative postion + # relative position self_attn_rel_pos_bias = None slen = prev_output_tokens.size(1) if self.self_attn_relative_position is not None: