2021-10-06 02:16:20 +00:00
MKFILE_PATH := $( abspath $( lastword $( MAKEFILE_LIST) ) )
ROOT_DIR := $( patsubst %/,%,$( dir $( MKFILE_PATH) ) )
2023-04-27 22:11:26 +00:00
GPP := /usr/bin/g++
#GPP:= /sw/gcc/11.2.0/bin/g++
2021-10-22 00:22:43 +00:00
i f e q ( $( CUDA_HOME ) , )
CUDA_HOME:= $( shell which nvcc | rev | cut -d'/' -f3- | rev)
e n d i f
2023-08-05 00:41:58 +00:00
i f e q ( $( ROCM_HOME ) , )
ROCM_HOME:= $( shell which hipcc | rev | cut -d'/' -f4- | rev)
2022-08-01 03:59:34 +00:00
e n d i f
2023-08-08 18:50:26 +00:00
i f n e q ( $( CUDA_HOME ) , )
i f n d e f C U D A _ V E R S I O N
$(warning WARNING : CUDA_VERSION not set . Call make with CUDA string , for example : make cuda 11x CUDA_VERSION =115 or make cpuonly CUDA_VERSION =CPU )
CUDA_VERSION :=
e n d i f
e l s e i f n e q ( $( ROCM_HOME ) , )
i f n d e f R O C M _ T A R G E T
$(error ERROR : ROCM_TARGET not set . Call make with ROCM string (see https ://www .llvm .org /docs /AMDGPUUsage .html #processors), for example: make hip ROCM_TARGET=gfx1030)
ROCM_TARGET :=
e n d i f
e n d i f
2022-08-01 02:41:56 +00:00
2023-04-01 17:33:03 +00:00
2021-10-06 02:16:20 +00:00
NVCC := $( CUDA_HOME) /bin/nvcc
2023-08-08 18:50:26 +00:00
HIPCC := $( ROCM_HOME) /bin/hipcc
2021-10-22 00:22:43 +00:00
2021-10-06 02:16:20 +00:00
###########################################
CSRC := $( ROOT_DIR) /csrc
2022-07-01 14:16:10 +00:00
BUILD_DIR := $( ROOT_DIR) /build
2021-10-06 02:16:20 +00:00
FILES_CUDA := $( CSRC) /ops.cu $( CSRC) /kernels.cu
2022-07-01 14:16:10 +00:00
FILES_CPP := $( CSRC) /common.cpp $( CSRC) /cpu_ops.cpp $( CSRC) /pythonInterface.c
2021-10-06 02:16:20 +00:00
2023-01-04 10:28:33 +00:00
INCLUDE := -I $( CUDA_HOME) /include -I $( ROOT_DIR) /csrc -I $( CONDA_PREFIX) /include -I $( ROOT_DIR) /include
2023-05-24 02:55:52 +00:00
LIB := -L $( CUDA_HOME) /lib64 -lcudart -lcublas -lcublasLt -lcusparse -L $( CONDA_PREFIX) /lib
2021-10-06 02:16:20 +00:00
# NVIDIA NVCC compilation flags
2021-11-29 17:54:19 +00:00
COMPUTE_CAPABILITY += -gencode arch = compute_50,code= sm_50 # Maxwell
COMPUTE_CAPABILITY += -gencode arch = compute_52,code= sm_52 # Maxwell
COMPUTE_CAPABILITY += -gencode arch = compute_60,code= sm_60 # Pascal
COMPUTE_CAPABILITY += -gencode arch = compute_61,code= sm_61 # Pascal
COMPUTE_CAPABILITY += -gencode arch = compute_70,code= sm_70 # Volta
2021-10-22 00:22:43 +00:00
2023-01-04 10:28:33 +00:00
CC_KEPLER := -gencode arch = compute_35,code= sm_35 # Kepler
CC_KEPLER += -gencode arch = compute_37,code= sm_37 # Kepler
2021-10-22 00:22:43 +00:00
# Later versions of CUDA support the new architectures
CC_CUDA11x := -gencode arch = compute_75,code= sm_75
CC_CUDA11x += -gencode arch = compute_80,code= sm_80
CC_CUDA11x += -gencode arch = compute_86,code= sm_86
2023-01-04 10:28:33 +00:00
2022-07-26 05:34:14 +00:00
CC_cublasLt110 := -gencode arch = compute_75,code= sm_75
CC_cublasLt110 += -gencode arch = compute_80,code= sm_80
CC_cublasLt111 := -gencode arch = compute_75,code= sm_75
2023-07-10 13:34:04 +00:00
CC_cublasLt111 += -gencode arch = compute_80,code= sm_80
CC_cublasLt111 += -gencode arch = compute_86,code= sm_86
2022-07-26 05:34:14 +00:00
2023-01-04 10:28:33 +00:00
CC_ADA_HOPPER := -gencode arch = compute_89,code= sm_89
CC_ADA_HOPPER += -gencode arch = compute_90,code= sm_90
2022-07-26 05:34:14 +00:00
2023-04-01 17:33:03 +00:00
all : $( BUILD_DIR ) env
2023-04-27 22:11:26 +00:00
$( NVCC) $( CC_cublasLt111) -Xcompiler '-fPIC' --use_fast_math -Xptxas= -v -dc $( FILES_CUDA) $( INCLUDE) $( LIB) --output-directory $( BUILD_DIR)
$( NVCC) $( CC_cublasLt111) -Xcompiler '-fPIC' -dlink $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o -o $( BUILD_DIR) /link.o
2022-08-01 02:41:56 +00:00
$( GPP) -std= c++14 -DBUILD_CUDA -shared -fPIC $( INCLUDE) $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o $( BUILD_DIR) /link.o $( FILES_CPP) -o ./bitsandbytes/libbitsandbytes_cuda$( CUDA_VERSION) .so $( LIB)
2021-10-06 02:16:20 +00:00
2023-06-20 02:40:41 +00:00
cuda110_nomatmul_kepler : $( BUILD_DIR ) env
2023-01-04 10:28:33 +00:00
$( NVCC) $( COMPUTE_CAPABILITY) $( CC_CUDA110) $( CC_KEPLER) -Xcompiler '-fPIC' --use_fast_math -Xptxas= -v -dc $( FILES_CUDA) $( INCLUDE) $( LIB) --output-directory $( BUILD_DIR) -D NO_CUBLASLT
$( NVCC) $( COMPUTE_CAPABILITY) $( CC_CUDA110) $( CC_KEPLER) -Xcompiler '-fPIC' -dlink $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o -o $( BUILD_DIR) /link.o
2022-08-01 02:41:56 +00:00
$( GPP) -std= c++14 -DBUILD_CUDA -shared -fPIC $( INCLUDE) $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o $( BUILD_DIR) /link.o $( FILES_CPP) -o ./bitsandbytes/libbitsandbytes_cuda$( CUDA_VERSION) _nocublaslt.so $( LIB)
2021-10-06 02:16:20 +00:00
2023-06-20 02:40:41 +00:00
cuda11x_nomatmul_kepler : $( BUILD_DIR ) env
2023-01-04 10:28:33 +00:00
$( NVCC) $( COMPUTE_CAPABILITY) $( CC_CUDA11x) $( CC_KEPLER) -Xcompiler '-fPIC' --use_fast_math -Xptxas= -v -dc $( FILES_CUDA) $( INCLUDE) $( LIB) --output-directory $( BUILD_DIR) -D NO_CUBLASLT
$( NVCC) $( COMPUTE_CAPABILITY) $( CC_CUDA11x) $( CC_KEPLER) -Xcompiler '-fPIC' -dlink $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o -o $( BUILD_DIR) /link.o
$( GPP) -std= c++14 -DBUILD_CUDA -shared -fPIC $( INCLUDE) $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o $( BUILD_DIR) /link.o $( FILES_CPP) -o ./bitsandbytes/libbitsandbytes_cuda$( CUDA_VERSION) _nocublaslt.so $( LIB)
2023-06-20 02:40:41 +00:00
cuda110_nomatmul : $( BUILD_DIR ) env
$( NVCC) $( COMPUTE_CAPABILITY) $( CC_CUDA110) -Xcompiler '-fPIC' --use_fast_math -Xptxas= -v -dc $( FILES_CUDA) $( INCLUDE) $( LIB) --output-directory $( BUILD_DIR) -D NO_CUBLASLT
$( NVCC) $( COMPUTE_CAPABILITY) $( CC_CUDA110) -Xcompiler '-fPIC' -dlink $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o -o $( BUILD_DIR) /link.o
$( GPP) -std= c++14 -DBUILD_CUDA -shared -fPIC $( INCLUDE) $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o $( BUILD_DIR) /link.o $( FILES_CPP) -o ./bitsandbytes/libbitsandbytes_cuda$( CUDA_VERSION) _nocublaslt.so $( LIB)
cuda11x_nomatmul : $( BUILD_DIR ) env
$( NVCC) $( COMPUTE_CAPABILITY) $( CC_CUDA11x) -Xcompiler '-fPIC' --use_fast_math -Xptxas= -v -dc $( FILES_CUDA) $( INCLUDE) $( LIB) --output-directory $( BUILD_DIR) -D NO_CUBLASLT
$( NVCC) $( COMPUTE_CAPABILITY) $( CC_CUDA11x) -Xcompiler '-fPIC' -dlink $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o -o $( BUILD_DIR) /link.o
$( GPP) -std= c++14 -DBUILD_CUDA -shared -fPIC $( INCLUDE) $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o $( BUILD_DIR) /link.o $( FILES_CPP) -o ./bitsandbytes/libbitsandbytes_cuda$( CUDA_VERSION) _nocublaslt.so $( LIB)
2023-01-04 10:28:33 +00:00
2023-07-14 04:16:23 +00:00
cuda118_nomatmul : $( BUILD_DIR ) env
$( NVCC) $( COMPUTE_CAPABILITY) $( CC_CUDA11x) $( CC_ADA_HOPPER) -Xcompiler '-fPIC' --use_fast_math -Xptxas= -v -dc $( FILES_CUDA) $( INCLUDE) $( LIB) --output-directory $( BUILD_DIR) -D NO_CUBLASLT
$( NVCC) $( COMPUTE_CAPABILITY) $( CC_CUDA11x) $( CC_ADA_HOPPER) -Xcompiler '-fPIC' -dlink $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o -o $( BUILD_DIR) /link.o
$( GPP) -std= c++14 -DBUILD_CUDA -shared -fPIC $( INCLUDE) $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o $( BUILD_DIR) /link.o $( FILES_CPP) -o ./bitsandbytes/libbitsandbytes_cuda$( CUDA_VERSION) _nocublaslt.so $( LIB)
2023-01-04 10:28:33 +00:00
cuda12x_nomatmul : $( BUILD_DIR ) env
$( NVCC) $( COMPUTE_CAPABILITY) $( CC_CUDA11x) $( CC_ADA_HOPPER) -Xcompiler '-fPIC' --use_fast_math -Xptxas= -v -dc $( FILES_CUDA) $( INCLUDE) $( LIB) --output-directory $( BUILD_DIR) -D NO_CUBLASLT
$( NVCC) $( COMPUTE_CAPABILITY) $( CC_CUDA11x) $( CC_ADA_HOPPER) -Xcompiler '-fPIC' -dlink $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o -o $( BUILD_DIR) /link.o
2022-08-01 02:41:56 +00:00
$( GPP) -std= c++14 -DBUILD_CUDA -shared -fPIC $( INCLUDE) $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o $( BUILD_DIR) /link.o $( FILES_CPP) -o ./bitsandbytes/libbitsandbytes_cuda$( CUDA_VERSION) _nocublaslt.so $( LIB)
2022-07-26 05:34:14 +00:00
cuda110 : $( BUILD_DIR ) env
$( NVCC) $( CC_cublasLt110) -Xcompiler '-fPIC' --use_fast_math -Xptxas= -v -dc $( FILES_CUDA) $( INCLUDE) $( LIB) --output-directory $( BUILD_DIR)
2022-10-27 11:11:29 +00:00
$( NVCC) $( CC_cublasLt110) -Xcompiler '-fPIC' -dlink $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o -o $( BUILD_DIR) /link.o
2022-08-01 02:41:56 +00:00
$( GPP) -std= c++14 -DBUILD_CUDA -shared -fPIC $( INCLUDE) $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o $( BUILD_DIR) /link.o $( FILES_CPP) -o ./bitsandbytes/libbitsandbytes_cuda$( CUDA_VERSION) .so $( LIB)
2022-07-26 05:34:14 +00:00
cuda11x : $( BUILD_DIR ) env
$( NVCC) $( CC_cublasLt111) -Xcompiler '-fPIC' --use_fast_math -Xptxas= -v -dc $( FILES_CUDA) $( INCLUDE) $( LIB) --output-directory $( BUILD_DIR)
2022-10-27 11:11:29 +00:00
$( NVCC) $( CC_cublasLt111) -Xcompiler '-fPIC' -dlink $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o -o $( BUILD_DIR) /link.o
2022-08-01 02:41:56 +00:00
$( GPP) -std= c++14 -DBUILD_CUDA -shared -fPIC $( INCLUDE) $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o $( BUILD_DIR) /link.o $( FILES_CPP) -o ./bitsandbytes/libbitsandbytes_cuda$( CUDA_VERSION) .so $( LIB)
2023-01-04 10:28:33 +00:00
2023-07-14 04:16:23 +00:00
cuda118 : $( BUILD_DIR ) env
$( NVCC) $( CC_cublasLt111) $( CC_ADA_HOPPER) -Xcompiler '-fPIC' --use_fast_math -Xptxas= -v -dc $( FILES_CUDA) $( INCLUDE) $( LIB) --output-directory $( BUILD_DIR)
$( NVCC) $( CC_cublasLt111) $( CC_ADA_HOPPER) -Xcompiler '-fPIC' -dlink $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o -o $( BUILD_DIR) /link.o
$( GPP) -std= c++14 -DBUILD_CUDA -shared -fPIC $( INCLUDE) $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o $( BUILD_DIR) /link.o $( FILES_CPP) -o ./bitsandbytes/libbitsandbytes_cuda$( CUDA_VERSION) .so $( LIB)
2023-01-04 10:28:33 +00:00
cuda12x : $( BUILD_DIR ) env
$( NVCC) $( CC_cublasLt111) $( CC_ADA_HOPPER) -Xcompiler '-fPIC' --use_fast_math -Xptxas= -v -dc $( FILES_CUDA) $( INCLUDE) $( LIB) --output-directory $( BUILD_DIR)
$( NVCC) $( CC_cublasLt111) $( CC_ADA_HOPPER) -Xcompiler '-fPIC' -dlink $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o -o $( BUILD_DIR) /link.o
$( GPP) -std= c++14 -DBUILD_CUDA -shared -fPIC $( INCLUDE) $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o $( BUILD_DIR) /link.o $( FILES_CPP) -o ./bitsandbytes/libbitsandbytes_cuda$( CUDA_VERSION) .so $( LIB)
2022-07-01 14:16:10 +00:00
cpuonly : $( BUILD_DIR ) env
2022-08-01 03:59:34 +00:00
$( GPP) -std= c++14 -shared -fPIC -I $( ROOT_DIR) /csrc -I $( ROOT_DIR) /include $( FILES_CPP) -o ./bitsandbytes/libbitsandbytes_cpu.so
2021-10-06 02:16:20 +00:00
2023-08-05 00:41:58 +00:00
2023-08-08 18:50:26 +00:00
HIP_INCLUDE := -I $( ROCM_HOME) /include -I $( ROOT_DIR) /csrc -I $( ROOT_DIR) /include
2023-08-05 00:41:58 +00:00
HIP_LIB := -L $( ROCM_HOME) /lib -lhipblas -lhiprand -lhipsparse #-lhipblaslt #TODO: check if this is actually only gfx90a
hip : $( BUILD_DIR )
2023-08-08 18:50:26 +00:00
$( HIPCC) -std= c++14 -c -fPIC --offload-arch= $( ROCM_TARGET) $( HIP_INCLUDE) -o $( BUILD_DIR) /ops.o -DNO_CUBLASLT -DBITS_AND_BYTES_USE_ROCM $( CSRC) /ops.cu
$( HIPCC) -std= c++14 -c -fPIC --offload-arch= $( ROCM_TARGET) $( HIP_INCLUDE) -o $( BUILD_DIR) /kernels.o -DNO_CUBLASLT -DBITS_AND_BYTES_USE_ROCM $( CSRC) /kernels.cu
2023-08-05 00:41:58 +00:00
# HCC is deprecated, but used by hipBLASlt header. Since blas isn't even used doesn't matter, this is just so that it even compiles
$( GPP) -std= c++14 -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ -DBUILD_CUDA -DBITS_AND_BYTES_USE_ROCM -shared -fPIC $( HIP_INCLUDE) $( BUILD_DIR) /ops.o $( BUILD_DIR) /kernels.o $( FILES_CPP) $( HIP_LIB) -o ./bitsandbytes/libbitsandbytes_hip_nohipblaslt.so
2021-10-22 00:22:43 +00:00
env :
@echo "ENVIRONMENT"
@echo "============================"
2022-08-01 02:41:56 +00:00
@echo " CUDA_VERSION: $( CUDA_VERSION) "
@echo "============================"
2021-10-22 00:22:43 +00:00
@echo " NVCC path: $( NVCC) "
2021-11-29 05:31:03 +00:00
@echo " GPP path: $( GPP) VERSION: ` $( GPP) --version | head -n 1` "
2021-10-22 00:22:43 +00:00
@echo " CUDA_HOME: $( CUDA_HOME) "
@echo " CONDA_PREFIX: $( CONDA_PREFIX) "
@echo " PATH: $( PATH) "
@echo " LD_LIBRARY_PATH: $( LD_LIBRARY_PATH) "
@echo "============================"
2021-10-06 02:16:20 +00:00
$(BUILD_DIR) :
2022-07-01 14:16:10 +00:00
mkdir -p build
2021-10-06 02:16:20 +00:00
mkdir -p dependencies
$(ROOT_DIR)/dependencies/cub :
git clone https://github.com/NVlabs/cub $( ROOT_DIR) /dependencies/cub
2021-10-07 15:39:38 +00:00
cd dependencies/cub; git checkout 1.11.0
2021-10-06 02:16:20 +00:00
clean :
2022-10-27 11:11:29 +00:00
rm build/*
2021-10-06 02:16:20 +00:00
cleaneggs :
rm -rf *.egg*
2022-08-01 03:59:34 +00:00
cleanlibs :
rm ./bitsandbytes/libbitsandbytes*.so