forked from mrq/DL-Art-School
Partially implement performers in transformer_builders
This commit is contained in:
parent
ec456b6733
commit
f503d8d96b
|
@ -12,7 +12,9 @@ Every function contains the following arguments:
|
|||
checkpointing: Whether or not the underlying implementation should support gradient checkpointing.
|
||||
"""
|
||||
import functools
|
||||
from time import time
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def null_position_embeddings(range, dim):
|
||||
|
@ -36,6 +38,8 @@ def build_hf_gpt_transformer(layers, model_dim, heads, num_tokens, max_seq_len,
|
|||
# Override the built in positional embeddings
|
||||
del gpt.wpe
|
||||
gpt.wpe = functools.partial(null_position_embeddings, dim=model_dim)
|
||||
# Built-in token embeddings are unused.
|
||||
del gpt.wte
|
||||
return gpt
|
||||
|
||||
|
||||
|
@ -43,7 +47,10 @@ def build_lr_performer(layers, model_dim, heads, num_tokens, max_seq_len, checkp
|
|||
"""
|
||||
lucidrains Performer implementation, https://github.com/lucidrains/performer-pytorch
|
||||
"""
|
||||
pass
|
||||
from models.lucidrains.performer.performer_pytorch import PerformerLM
|
||||
model = PerformerLM(dim=model_dim, depth=layers, heads=heads, dim_head=model_dim, causal=True,
|
||||
num_tokens=num_tokens, max_seq_len=max_seq_len)
|
||||
return model
|
||||
|
||||
|
||||
def build_lr_reformer(layers, model_dim, heads, num_tokens, max_seq_len, checkpointing):
|
||||
|
@ -61,10 +68,19 @@ def build_lr_xformer(layers, model_dim, heads, num_tokens, max_seq_len, checkpoi
|
|||
|
||||
|
||||
def test_all_performance(**kwargs):
|
||||
transformer_builders = [build_hf_gpt_transformer, build_lr_performer, build_lr_reformer, build_lr_xformer]
|
||||
transformer_builders = [#build_hf_gpt_transformer,
|
||||
build_lr_performer,]
|
||||
# build_lr_reformer,
|
||||
# build_lr_xformer]
|
||||
for builder in transformer_builders:
|
||||
model = builder(**kwargs)
|
||||
start = time()
|
||||
args = torch.randint(0, 8192, (16,450))
|
||||
for k in tqdm(range(10)):
|
||||
model(args)
|
||||
stop = time()
|
||||
print(f"Model: {str(builder)}; Elapsed: {stop-start}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_all_performance(12, 512, 8, 8192, 1000, False)
|
||||
test_all_performance(layers=12, model_dim=512, heads=8, num_tokens=8192, max_seq_len=1000, checkpointing=False)
|
Loading…
Reference in New Issue
Block a user