2022-07-28 04:16:04 +00:00
"""
2022-08-01 16:32:47 +00:00
extract factors the build is dependent on :
2022-08-02 14:42:27 +00:00
[ X ] compute capability
2022-08-01 16:32:47 +00:00
[ ] TODO : Q - What if we have multiple GPUs of different makes ?
2022-07-28 04:16:04 +00:00
- CUDA version
- Software :
- CPU - only : only CPU quantization functions ( no optimizer , no matrix multipl )
- CuBLAS - LT : full - build 8 - bit optimizer
- no CuBLAS - LT : no 8 - bit matrix multiplication ( ` nomatmul ` )
evaluation :
- if paths faulty , return meaningful error
- else :
- determine CUDA version
- determine capabilities
- based on that set the default path
"""
2022-08-01 10:31:48 +00:00
import ctypes
2022-07-28 04:16:04 +00:00
2022-08-03 04:26:50 +00:00
from . paths import determine_cuda_runtime_lib_path
2022-08-01 10:31:48 +00:00
2022-08-01 00:47:44 +00:00
def check_cuda_result ( cuda , result_val ) :
2022-08-02 02:43:09 +00:00
# 3. Check for CUDA errors
2022-08-01 00:47:44 +00:00
if result_val != 0 :
2022-08-02 02:43:09 +00:00
error_str = ctypes . c_char_p ( )
2022-08-01 00:47:44 +00:00
cuda . cuGetErrorString ( result_val , ctypes . byref ( error_str ) )
2022-08-23 20:59:34 +00:00
print ( f " CUDA exception! Error code: { error_str . value . decode ( ) } " )
2022-08-01 00:47:44 +00:00
2022-08-05 14:13:24 +00:00
def get_cuda_version ( cuda , cudart_path ) :
# https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION
try :
cudart = ctypes . CDLL ( cudart_path )
except OSError :
# TODO: shouldn't we error or at least warn here?
print ( f ' ERROR: libcudart.so could not be read from path: { cudart_path } ! ' )
return None
version = ctypes . c_int ( )
check_cuda_result ( cuda , cudart . cudaRuntimeGetVersion ( ctypes . byref ( version ) ) )
version = int ( version . value )
major = version / / 1000
minor = ( version - ( major * 1000 ) ) / / 10
2022-08-10 03:02:47 +00:00
if major < 11 :
2022-08-17 02:03:19 +00:00
print ( ' CUDA SETUP: CUDA version lower than 11 are currenlty not supported for LLM.int8(). You will be only to use 8-bit optimizers and quantization routines!! ' )
2022-08-10 03:02:47 +00:00
2022-08-05 14:13:24 +00:00
return f ' { major } { minor } '
def get_cuda_lib_handle ( ) :
# 1. find libcuda.so library (GPU driver) (/usr/lib)
try :
cuda = ctypes . CDLL ( " libcuda.so " )
except OSError :
# TODO: shouldn't we error or at least warn here?
2022-08-23 20:59:34 +00:00
print ( ' CUDA SETUP: WARNING! libcuda.so not found! Do you have a CUDA driver installed? If you are on a cluster, make sure you are on a CUDA machine! ' )
2022-08-05 14:13:24 +00:00
return None
check_cuda_result ( cuda , cuda . cuInit ( 0 ) )
2022-08-01 10:31:48 +00:00
2022-08-05 14:13:24 +00:00
return cuda
def get_compute_capabilities ( cuda ) :
2022-08-03 04:26:50 +00:00
"""
1. find libcuda . so library ( GPU driver ) ( / usr / lib )
init_device - > init variables - > call function by reference
2. call extern C function to determine CC
( https : / / docs . nvidia . com / cuda / cuda - driver - api / group__CUDA__DEVICE__DEPRECATED . html )
3. Check for CUDA errors
https : / / stackoverflow . com / questions / 14038589 / what - is - the - canonical - way - to - check - for - errors - using - the - cuda - runtime - api
# bits taken from https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549
"""
2022-08-02 02:43:09 +00:00
2022-08-01 00:47:44 +00:00
nGpus = ctypes . c_int ( )
cc_major = ctypes . c_int ( )
cc_minor = ctypes . c_int ( )
device = ctypes . c_int ( )
2022-08-02 02:43:09 +00:00
check_cuda_result ( cuda , cuda . cuDeviceGetCount ( ctypes . byref ( nGpus ) ) )
2022-08-01 00:47:44 +00:00
ccs = [ ]
for i in range ( nGpus . value ) :
2022-08-02 02:43:09 +00:00
check_cuda_result ( cuda , cuda . cuDeviceGet ( ctypes . byref ( device ) , i ) )
2022-08-02 03:05:03 +00:00
ref_major = ctypes . byref ( cc_major )
ref_minor = ctypes . byref ( cc_minor )
2022-08-03 04:26:50 +00:00
# 2. call extern C function to determine CC
check_cuda_result (
cuda , cuda . cuDeviceComputeCapability ( ref_major , ref_minor , device )
2022-07-28 04:16:04 +00:00
)
2022-08-03 04:26:50 +00:00
ccs . append ( f " { cc_major . value } . { cc_minor . value } " )
2022-07-28 04:16:04 +00:00
2022-08-04 16:16:00 +00:00
return ccs
2022-07-28 04:16:04 +00:00
2022-08-03 04:26:50 +00:00
# def get_compute_capability()-> Union[List[str, ...], None]: # FIXME: error
2022-08-05 14:13:24 +00:00
def get_compute_capability ( cuda ) :
2022-08-03 04:26:50 +00:00
"""
Extracts the highest compute capbility from all available GPUs , as compute
capabilities are downwards compatible . If no GPUs are detected , it returns
None .
"""
2022-08-05 14:13:24 +00:00
ccs = get_compute_capabilities ( cuda )
2022-08-04 16:16:00 +00:00
if ccs is not None :
2022-08-03 04:26:50 +00:00
# TODO: handle different compute capabilities; for now, take the max
return ccs [ - 1 ]
return None
2022-07-28 04:16:04 +00:00
2022-08-01 10:31:48 +00:00
2022-07-28 04:16:04 +00:00
def evaluate_cuda_setup ( ) :
2022-08-10 03:02:47 +00:00
print ( ' ' )
print ( ' = ' * 35 + ' BUG REPORT ' + ' = ' * 35 )
2022-08-17 02:03:19 +00:00
print ( ' Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues ' )
print ( ' For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link ' )
2022-08-10 03:02:47 +00:00
print ( ' = ' * 80 )
2022-08-01 10:31:48 +00:00
binary_name = " libbitsandbytes_cpu.so "
2022-08-23 20:59:34 +00:00
#if not torch.cuda.is_available():
#print('No GPU detected. Loading CPU library...')
#return binary_name
2022-08-05 14:13:24 +00:00
cudart_path = determine_cuda_runtime_lib_path ( )
if cudart_path is None :
print (
" WARNING: No libcudart.so found! Install CUDA or the cudatoolkit package (anaconda)! "
)
return binary_name
2022-08-17 02:03:19 +00:00
print ( f " CUDA SETUP: CUDA runtime path found: { cudart_path } " )
2022-08-05 14:13:24 +00:00
cuda = get_cuda_lib_handle ( )
cc = get_compute_capability ( cuda )
2022-08-10 03:02:47 +00:00
print ( f " CUDA SETUP: Highest compute capability among GPUs detected: { cc } " )
2022-08-05 14:13:24 +00:00
cuda_version_string = get_cuda_version ( cuda , cudart_path )
2022-08-01 00:47:44 +00:00
2022-08-04 16:16:00 +00:00
if cc == ' ' :
2022-08-01 10:31:48 +00:00
print (
2022-08-02 02:43:09 +00:00
" WARNING: No GPU detected! Check your CUDA paths. Processing to load CPU-only library... "
2022-08-01 10:31:48 +00:00
)
2022-08-01 00:47:44 +00:00
return binary_name
2022-08-03 04:26:50 +00:00
# 7.5 is the minimum CC vor cublaslt
2022-08-01 10:31:48 +00:00
has_cublaslt = cc in [ " 7.5 " , " 8.0 " , " 8.6 " ]
2022-08-01 00:47:44 +00:00
2022-08-01 10:31:48 +00:00
# TODO:
2022-08-02 02:43:09 +00:00
# (1) CUDA missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible)
2022-08-01 00:47:44 +00:00
# (2) Multiple CUDA versions installed
2022-08-02 02:43:09 +00:00
# we use ls -l instead of nvcc to determine the cuda version
# since most installations will have the libcudart.so installed, but not the compiler
2022-08-17 02:03:19 +00:00
print ( f ' CUDA SETUP: Detected CUDA version { cuda_version_string } ' )
2022-08-01 00:47:44 +00:00
2022-08-03 04:26:50 +00:00
def get_binary_name ( ) :
" if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt.so "
bin_base_name = " libbitsandbytes_cuda "
if has_cublaslt :
return f " { bin_base_name } { cuda_version_string } .so "
else :
2022-08-04 16:16:00 +00:00
return f " { bin_base_name } { cuda_version_string } _nocublaslt.so "
binary_name = get_binary_name ( )
2022-08-01 00:47:44 +00:00
return binary_name