Merge pull request #4 from kashif/kashif-patch-1

remove lambda
This commit is contained in:
Shuming Ma 2022-11-29 12:21:53 +08:00 committed by GitHub
commit 559b5fdf56
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 5 additions and 5 deletions

View File

@ -391,7 +391,7 @@ class ClassificationHead(nn.Module):
x = features[:, 0, :] # take <s> token (equiv. to [CLS]) x = features[:, 0, :] # take <s> token (equiv. to [CLS])
x = self.dropout(x) x = self.dropout(x)
x = self.dense(x) x = self.dense(x)
x = self.activation_fn(x) x = self.activation_fn(x.float()).as_type(x)
x = self.dropout(x) x = self.dropout(x)
x = self.out_proj(x) x = self.out_proj(x)
return x return x
@ -418,7 +418,7 @@ class LMHead(nn.Module):
features = features[masked_tokens, :] features = features[masked_tokens, :]
x = self.dense(features) x = self.dense(features)
x = self.activation_fn(x) x = self.activation_fn(x.float()).as_type(x)
x = self.layer_norm(x) x = self.layer_norm(x)
# project back to size of vocabulary with bias # project back to size of vocabulary with bias
x = F.linear(x, self.weight) + self.bias x = F.linear(x, self.weight) + self.bias

View File

@ -400,7 +400,7 @@ class Decoder(nn.Module):
) )
x = x.transpose(0, 1) x = x.transpose(0, 1)
# relative postion # relative position
self_attn_rel_pos_bias = None self_attn_rel_pos_bias = None
slen = prev_output_tokens.size(1) slen = prev_output_tokens.size(1)
if self.self_attn_relative_position is not None: if self.self_attn_relative_position is not None:

View File

@ -85,7 +85,7 @@ def get_activation_fn(activation):
if activation == "relu": if activation == "relu":
return F.relu return F.relu
elif activation == "gelu": elif activation == "gelu":
return lambda x: F.gelu(x.float()).type_as(x) return F.gelu
else: else:
raise NotImplementedError raise NotImplementedError
@ -121,7 +121,7 @@ class FeedForwardNetwork(nn.Module):
x_shape = x.shape x_shape = x.shape
x = x.reshape(-1, x.size(-1)) x = x.reshape(-1, x.size(-1))
x = self.fc1(x) x = self.fc1(x)
x = self.activation_fn(x) x = self.activation_fn(x.float()).as_type(x)
x = self.activation_dropout_module(x) x = self.activation_dropout_module(x)
if self.ffn_layernorm is not None: if self.ffn_layernorm is not None:
x = self.ffn_layernorm(x) x = self.ffn_layernorm(x)