diff --git a/csrc/pythonInterface.c b/csrc/pythonInterface.c index 0aa82fe..865e4b6 100644 --- a/csrc/pythonInterface.c +++ b/csrc/pythonInterface.c @@ -385,6 +385,11 @@ extern "C" void cprefetch(void *ptr, size_t bytes, int device) { + + int hasPrefetch = 0; + CUDA_CHECK_RETURN(cudaDeviceGetAttribute(&hasPrefetch, cudaDevAttrConcurrentManagedAccess, device)); // 40ns overhead + if (hasPrefetch == 0) return; + CUDA_CHECK_RETURN(cudaMemPrefetchAsync(ptr, bytes, device, 0)); CUDA_CHECK_RETURN(cudaPeekAtLastError()); }