From 0f5c3948709ae70cf733cefbd831aaea8a4e38c9 Mon Sep 17 00:00:00 2001 From: Tim Dettmers Date: Wed, 1 Feb 2023 20:27:01 -0800 Subject: [PATCH] Added version 0.37.0. --- CHANGELOG.md | 12 ++++++++++++ bitsandbytes/cuda_setup/main.py | 13 +++++++------ setup.py | 2 +- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77703a0..ac239de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -189,3 +189,15 @@ Improvements: - StableEmbedding layer now has device and dtype parameters to make it 1:1 replaceable with regular Embedding layers (@lostmsu) - runtime performance of block-wise quantization slightly improved - added error message for the case multiple libcudart.so are installed and bitsandbytes picks the wrong one + + +### 0.37.0 + +#### Int8 Matmul + backward support for all GPUs + +Features: + - Int8 MatmulLt now supports backward through inversion of the ColTuring/ColAmpere format. Slow, but memory efficient. Big thanks to @borzunov + - Int8 now supported on all GPUs. On devices with compute capability < 7.5, the Int weights are cast to 16/32-bit for the matrix multiplication. Contributed by @borzunov + +Improvements: + - Improved logging for the CUDA detection mechanism. diff --git a/bitsandbytes/cuda_setup/main.py b/bitsandbytes/cuda_setup/main.py index ce44d97..cd9573f 100644 --- a/bitsandbytes/cuda_setup/main.py +++ b/bitsandbytes/cuda_setup/main.py @@ -80,9 +80,10 @@ class CUDASetup: self.add_log_entry('python setup.py install') def initialize(self): - self.has_printed = False - self.lib = None - self.initialized = False + if not getattr(self, 'initialized', False): + self.has_printed = False + self.lib = None + self.initialized = False def run_cuda_setup(self): self.initialized = True @@ -103,7 +104,7 @@ class CUDASetup: legacy_binary_name = "libbitsandbytes_cpu.so" self.add_log_entry(f"CUDA SETUP: Defaulting to {legacy_binary_name}...") binary_path = package_dir / legacy_binary_name - if not binary_path.exists(): + if not binary_path.exists() or torch.cuda.is_available(): self.add_log_entry('') self.add_log_entry('='*48 + 'ERROR' + '='*37) self.add_log_entry('CUDA SETUP: CUDA detection failed! Possible reasons:') @@ -112,6 +113,7 @@ class CUDASetup: self.add_log_entry('3. You have multiple conflicting CUDA libraries') self.add_log_entry('4. Required library not pre-compiled for this bitsandbytes release!') self.add_log_entry('CUDA SETUP: If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION` for example, `make CUDA_VERSION=113`.') + self.add_log_entry('CUDA SETUP: The CUDA version for the compile might depend on your conda install. Inspect CUDA version via `conda list | grep cuda`.') self.add_log_entry('='*80) self.add_log_entry('') self.generate_instructions() @@ -148,7 +150,7 @@ def is_cublasLt_compatible(cc): if cc is not None: cc_major, cc_minor = cc.split('.') if int(cc_major) < 7 or (int(cc_major) == 7 and int(cc_minor) < 5): - cuda_setup.add_log_entry("WARNING: Compute capability < 7.5 detected! Proceeding to load CPU-only library...", is_warning=True) + cuda_setup.add_log_entry("WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU!", is_warning=True) else: has_cublaslt = True return has_cublaslt @@ -362,7 +364,6 @@ def evaluate_cuda_setup(): print('') print('='*35 + 'BUG REPORT' + '='*35) print('Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues') - print('For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link') print('='*80) if not torch.cuda.is_available(): return 'libsbitsandbytes_cpu.so', None, None, None, None diff --git a/setup.py b/setup.py index 93df40e..e3f453e 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ def read(fname): setup( name=f"bitsandbytes", - version=f"0.36.0-2", + version=f"0.37.0", author="Tim Dettmers", author_email="dettmers@cs.washington.edu", description="8-bit optimizers and matrix multiplication routines.",