From 89cccd8196b885de777cc6f627bd05c96c700300 Mon Sep 17 00:00:00 2001 From: Tim Dettmers Date: Tue, 2 May 2023 09:40:31 -0700 Subject: [PATCH] A tile multi-tiling. --- csrc/kernels.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csrc/kernels.cu b/csrc/kernels.cu index d09f78a..a528d16 100644 --- a/csrc/kernels.cu +++ b/csrc/kernels.cu @@ -3061,10 +3061,10 @@ template __global__ void gemm_device(int M, T local_A[1]; T local_B[32]; - const int a_tile_offset = (8*16 + 16); + const int a_tile_offset = (16 + 16); const int b_tile_offset = (16*32 + 16); - __shared__ T smem_A[2*batch_size_warps*8*16 + (2*16*(batch_size_warps-1))]; + __shared__ T smem_A[8*16 + (4*16*(batch_size_warps-1))]; __shared__ T smem_B[2*batch_size_warps*16*32 + (2*16*(batch_size_warps-1))]; __shared__ T smem_C[8*32];