From f503d8d96b6b52291b7c365b84b21ed282b4d5fb Mon Sep 17 00:00:00 2001 From: James Betker Date: Sun, 9 Jan 2022 22:35:03 -0700 Subject: [PATCH] Partially implement performers in transformer_builders --- .../models/gpt_voice/transformer_builders.py | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/codes/models/gpt_voice/transformer_builders.py b/codes/models/gpt_voice/transformer_builders.py index 28d1b355..5092fb42 100644 --- a/codes/models/gpt_voice/transformer_builders.py +++ b/codes/models/gpt_voice/transformer_builders.py @@ -12,7 +12,9 @@ Every function contains the following arguments: checkpointing: Whether or not the underlying implementation should support gradient checkpointing. """ import functools +from time import time import torch +from tqdm import tqdm def null_position_embeddings(range, dim): @@ -36,6 +38,8 @@ def build_hf_gpt_transformer(layers, model_dim, heads, num_tokens, max_seq_len, # Override the built in positional embeddings del gpt.wpe gpt.wpe = functools.partial(null_position_embeddings, dim=model_dim) + # Built-in token embeddings are unused. + del gpt.wte return gpt @@ -43,7 +47,10 @@ def build_lr_performer(layers, model_dim, heads, num_tokens, max_seq_len, checkp """ lucidrains Performer implementation, https://github.com/lucidrains/performer-pytorch """ - pass + from models.lucidrains.performer.performer_pytorch import PerformerLM + model = PerformerLM(dim=model_dim, depth=layers, heads=heads, dim_head=model_dim, causal=True, + num_tokens=num_tokens, max_seq_len=max_seq_len) + return model def build_lr_reformer(layers, model_dim, heads, num_tokens, max_seq_len, checkpointing): @@ -61,10 +68,19 @@ def build_lr_xformer(layers, model_dim, heads, num_tokens, max_seq_len, checkpoi def test_all_performance(**kwargs): - transformer_builders = [build_hf_gpt_transformer, build_lr_performer, build_lr_reformer, build_lr_xformer] + transformer_builders = [#build_hf_gpt_transformer, + build_lr_performer,] + # build_lr_reformer, + # build_lr_xformer] for builder in transformer_builders: model = builder(**kwargs) + start = time() + args = torch.randint(0, 8192, (16,450)) + for k in tqdm(range(10)): + model(args) + stop = time() + print(f"Model: {str(builder)}; Elapsed: {stop-start}") if __name__ == '__main__': - test_all_performance(12, 512, 8, 8192, 1000, False) \ No newline at end of file + test_all_performance(layers=12, model_dim=512, heads=8, num_tokens=8192, max_seq_len=1000, checkpointing=False) \ No newline at end of file