diff --git a/csrc/kernels.cu b/csrc/kernels.cu index 8ce881c..d09f78a 100644 --- a/csrc/kernels.cu +++ b/csrc/kernels.cu @@ -3145,7 +3145,6 @@ template __global__ void gemm_device(int M, } ticktock = ticktock == 0 ? 1 : 0; - __syncthreads(); if(warp_id == (WARPS-1)) for(int k = 0; k < batch_size_warps; k++) {