commit
559b5fdf56
|
@ -391,7 +391,7 @@ class ClassificationHead(nn.Module):
|
||||||
x = features[:, 0, :] # take <s> token (equiv. to [CLS])
|
x = features[:, 0, :] # take <s> token (equiv. to [CLS])
|
||||||
x = self.dropout(x)
|
x = self.dropout(x)
|
||||||
x = self.dense(x)
|
x = self.dense(x)
|
||||||
x = self.activation_fn(x)
|
x = self.activation_fn(x.float()).as_type(x)
|
||||||
x = self.dropout(x)
|
x = self.dropout(x)
|
||||||
x = self.out_proj(x)
|
x = self.out_proj(x)
|
||||||
return x
|
return x
|
||||||
|
@ -418,7 +418,7 @@ class LMHead(nn.Module):
|
||||||
features = features[masked_tokens, :]
|
features = features[masked_tokens, :]
|
||||||
|
|
||||||
x = self.dense(features)
|
x = self.dense(features)
|
||||||
x = self.activation_fn(x)
|
x = self.activation_fn(x.float()).as_type(x)
|
||||||
x = self.layer_norm(x)
|
x = self.layer_norm(x)
|
||||||
# project back to size of vocabulary with bias
|
# project back to size of vocabulary with bias
|
||||||
x = F.linear(x, self.weight) + self.bias
|
x = F.linear(x, self.weight) + self.bias
|
||||||
|
|
|
@ -400,7 +400,7 @@ class Decoder(nn.Module):
|
||||||
)
|
)
|
||||||
x = x.transpose(0, 1)
|
x = x.transpose(0, 1)
|
||||||
|
|
||||||
# relative postion
|
# relative position
|
||||||
self_attn_rel_pos_bias = None
|
self_attn_rel_pos_bias = None
|
||||||
slen = prev_output_tokens.size(1)
|
slen = prev_output_tokens.size(1)
|
||||||
if self.self_attn_relative_position is not None:
|
if self.self_attn_relative_position is not None:
|
||||||
|
|
|
@ -85,7 +85,7 @@ def get_activation_fn(activation):
|
||||||
if activation == "relu":
|
if activation == "relu":
|
||||||
return F.relu
|
return F.relu
|
||||||
elif activation == "gelu":
|
elif activation == "gelu":
|
||||||
return lambda x: F.gelu(x.float()).type_as(x)
|
return F.gelu
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@ -121,7 +121,7 @@ class FeedForwardNetwork(nn.Module):
|
||||||
x_shape = x.shape
|
x_shape = x.shape
|
||||||
x = x.reshape(-1, x.size(-1))
|
x = x.reshape(-1, x.size(-1))
|
||||||
x = self.fc1(x)
|
x = self.fc1(x)
|
||||||
x = self.activation_fn(x)
|
x = self.activation_fn(x.float()).as_type(x)
|
||||||
x = self.activation_dropout_module(x)
|
x = self.activation_dropout_module(x)
|
||||||
if self.ffn_layernorm is not None:
|
if self.ffn_layernorm is not None:
|
||||||
x = self.ffn_layernorm(x)
|
x = self.ffn_layernorm(x)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user