diff --git a/bitsandbytes_windows/cextension.py b/bitsandbytes_windows/cextension.py new file mode 100644 index 00000000..d38684a2 --- /dev/null +++ b/bitsandbytes_windows/cextension.py @@ -0,0 +1,54 @@ +import ctypes as ct +from pathlib import Path +from warnings import warn + +from .cuda_setup.main import evaluate_cuda_setup + + +class CUDALibrary_Singleton(object): + _instance = None + + def __init__(self): + raise RuntimeError("Call get_instance() instead") + + def initialize(self): + binary_name = evaluate_cuda_setup() + package_dir = Path(__file__).parent + binary_path = package_dir / binary_name + + if not binary_path.exists(): + print(f"CUDA SETUP: TODO: compile library for specific version: {binary_name}") + legacy_binary_name = "libbitsandbytes.so" + print(f"CUDA SETUP: Defaulting to {legacy_binary_name}...") + binary_path = package_dir / legacy_binary_name + if not binary_path.exists(): + print('CUDA SETUP: CUDA detection failed. Either CUDA driver not installed, CUDA not installed, or you have multiple conflicting CUDA libraries!') + print('CUDA SETUP: If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION` for example, `make CUDA_VERSION=113`.') + raise Exception('CUDA SETUP: Setup Failed!') + # self.lib = ct.cdll.LoadLibrary(binary_path) + self.lib = ct.cdll.LoadLibrary(str(binary_path)) # $$$ + else: + print(f"CUDA SETUP: Loading binary {binary_path}...") + # self.lib = ct.cdll.LoadLibrary(binary_path) + self.lib = ct.cdll.LoadLibrary(str(binary_path)) # $$$ + + @classmethod + def get_instance(cls): + if cls._instance is None: + cls._instance = cls.__new__(cls) + cls._instance.initialize() + return cls._instance + + +lib = CUDALibrary_Singleton.get_instance().lib +try: + lib.cadam32bit_g32 + lib.get_context.restype = ct.c_void_p + lib.get_cusparse.restype = ct.c_void_p + COMPILED_WITH_CUDA = True +except AttributeError: + warn( + "The installed version of bitsandbytes was compiled without GPU support. " + "8-bit optimizers and GPU quantization are unavailable." + ) + COMPILED_WITH_CUDA = False diff --git a/bitsandbytes_windows/cuda_setup/main.py b/bitsandbytes_windows/cuda_setup/main.py new file mode 100644 index 00000000..7e5f9c98 --- /dev/null +++ b/bitsandbytes_windows/cuda_setup/main.py @@ -0,0 +1,166 @@ +""" +extract factors the build is dependent on: +[X] compute capability + [ ] TODO: Q - What if we have multiple GPUs of different makes? +- CUDA version +- Software: + - CPU-only: only CPU quantization functions (no optimizer, no matrix multiple) + - CuBLAS-LT: full-build 8-bit optimizer + - no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`) + +evaluation: + - if paths faulty, return meaningful error + - else: + - determine CUDA version + - determine capabilities + - based on that set the default path +""" + +import ctypes + +from .paths import determine_cuda_runtime_lib_path + + +def check_cuda_result(cuda, result_val): + # 3. Check for CUDA errors + if result_val != 0: + error_str = ctypes.c_char_p() + cuda.cuGetErrorString(result_val, ctypes.byref(error_str)) + print(f"CUDA exception! Error code: {error_str.value.decode()}") + +def get_cuda_version(cuda, cudart_path): + # https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION + try: + cudart = ctypes.CDLL(cudart_path) + except OSError: + # TODO: shouldn't we error or at least warn here? + print(f'ERROR: libcudart.so could not be read from path: {cudart_path}!') + return None + + version = ctypes.c_int() + check_cuda_result(cuda, cudart.cudaRuntimeGetVersion(ctypes.byref(version))) + version = int(version.value) + major = version//1000 + minor = (version-(major*1000))//10 + + if major < 11: + print('CUDA SETUP: CUDA version lower than 11 are currently not supported for LLM.int8(). You will be only to use 8-bit optimizers and quantization routines!!') + + return f'{major}{minor}' + + +def get_cuda_lib_handle(): + # 1. find libcuda.so library (GPU driver) (/usr/lib) + try: + cuda = ctypes.CDLL("libcuda.so") + except OSError: + # TODO: shouldn't we error or at least warn here? + print('CUDA SETUP: WARNING! libcuda.so not found! Do you have a CUDA driver installed? If you are on a cluster, make sure you are on a CUDA machine!') + return None + check_cuda_result(cuda, cuda.cuInit(0)) + + return cuda + + +def get_compute_capabilities(cuda): + """ + 1. find libcuda.so library (GPU driver) (/usr/lib) + init_device -> init variables -> call function by reference + 2. call extern C function to determine CC + (https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE__DEPRECATED.html) + 3. Check for CUDA errors + https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api + # bits taken from https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549 + """ + + + nGpus = ctypes.c_int() + cc_major = ctypes.c_int() + cc_minor = ctypes.c_int() + + device = ctypes.c_int() + + check_cuda_result(cuda, cuda.cuDeviceGetCount(ctypes.byref(nGpus))) + ccs = [] + for i in range(nGpus.value): + check_cuda_result(cuda, cuda.cuDeviceGet(ctypes.byref(device), i)) + ref_major = ctypes.byref(cc_major) + ref_minor = ctypes.byref(cc_minor) + # 2. call extern C function to determine CC + check_cuda_result( + cuda, cuda.cuDeviceComputeCapability(ref_major, ref_minor, device) + ) + ccs.append(f"{cc_major.value}.{cc_minor.value}") + + return ccs + + +# def get_compute_capability()-> Union[List[str, ...], None]: # FIXME: error +def get_compute_capability(cuda): + """ + Extracts the highest compute capbility from all available GPUs, as compute + capabilities are downwards compatible. If no GPUs are detected, it returns + None. + """ + ccs = get_compute_capabilities(cuda) + if ccs is not None: + # TODO: handle different compute capabilities; for now, take the max + return ccs[-1] + return None + + +def evaluate_cuda_setup(): + print('') + print('='*35 + 'BUG REPORT' + '='*35) + print('Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues') + print('For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link') + print('='*80) + return "libbitsandbytes_cuda116.dll" # $$$ + + binary_name = "libbitsandbytes_cpu.so" + #if not torch.cuda.is_available(): + #print('No GPU detected. Loading CPU library...') + #return binary_name + + cudart_path = determine_cuda_runtime_lib_path() + if cudart_path is None: + print( + "WARNING: No libcudart.so found! Install CUDA or the cudatoolkit package (anaconda)!" + ) + return binary_name + + print(f"CUDA SETUP: CUDA runtime path found: {cudart_path}") + cuda = get_cuda_lib_handle() + cc = get_compute_capability(cuda) + print(f"CUDA SETUP: Highest compute capability among GPUs detected: {cc}") + cuda_version_string = get_cuda_version(cuda, cudart_path) + + + if cc == '': + print( + "WARNING: No GPU detected! Check your CUDA paths. Processing to load CPU-only library..." + ) + return binary_name + + # 7.5 is the minimum CC vor cublaslt + has_cublaslt = cc in ["7.5", "8.0", "8.6"] + + # TODO: + # (1) CUDA missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible) + # (2) Multiple CUDA versions installed + + # we use ls -l instead of nvcc to determine the cuda version + # since most installations will have the libcudart.so installed, but not the compiler + print(f'CUDA SETUP: Detected CUDA version {cuda_version_string}') + + def get_binary_name(): + "if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt.so" + bin_base_name = "libbitsandbytes_cuda" + if has_cublaslt: + return f"{bin_base_name}{cuda_version_string}.so" + else: + return f"{bin_base_name}{cuda_version_string}_nocublaslt.so" + + binary_name = get_binary_name() + + return binary_name diff --git a/bitsandbytes_windows/libbitsandbytes_cpu.dll b/bitsandbytes_windows/libbitsandbytes_cpu.dll new file mode 100644 index 00000000..b733af47 Binary files /dev/null and b/bitsandbytes_windows/libbitsandbytes_cpu.dll differ diff --git a/bitsandbytes_windows/libbitsandbytes_cuda116.dll b/bitsandbytes_windows/libbitsandbytes_cuda116.dll new file mode 100644 index 00000000..a999316e Binary files /dev/null and b/bitsandbytes_windows/libbitsandbytes_cuda116.dll differ diff --git a/bitsandbytes_windows/nn/__init__.py b/bitsandbytes_windows/nn/__init__.py new file mode 100644 index 00000000..b1944007 --- /dev/null +++ b/bitsandbytes_windows/nn/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +from .modules import Int8Params, Linear8bit, Linear8bitLt +from .modules import Embedding as StableEmbedding diff --git a/requirements.txt b/requirements.txt index 877d9869..3e3ebd3b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -45,4 +45,7 @@ rotary-embedding-torch axial_positional_embedding g-mlp-pytorch x-clip -x_transformers==1.0.4 \ No newline at end of file +x_transformers==1.0.4 + +# bitsandbytes +bitsandbytes==0.35.0 diff --git a/setup.py b/setup.py index 1073fb00..08c69087 100644 --- a/setup.py +++ b/setup.py @@ -67,6 +67,8 @@ setuptools.setup( "g-mlp-pytorch", "x-clip", "x_transformers==1.0.4", + + "bitsandbytes==0.35.0", ], classifiers=[ "Programming Language :: Python :: 3",