Added version 0.37.0.
This commit is contained in:
parent
de53588934
commit
0f5c394870
12
CHANGELOG.md
12
CHANGELOG.md
|
@ -189,3 +189,15 @@ Improvements:
|
||||||
- StableEmbedding layer now has device and dtype parameters to make it 1:1 replaceable with regular Embedding layers (@lostmsu)
|
- StableEmbedding layer now has device and dtype parameters to make it 1:1 replaceable with regular Embedding layers (@lostmsu)
|
||||||
- runtime performance of block-wise quantization slightly improved
|
- runtime performance of block-wise quantization slightly improved
|
||||||
- added error message for the case multiple libcudart.so are installed and bitsandbytes picks the wrong one
|
- added error message for the case multiple libcudart.so are installed and bitsandbytes picks the wrong one
|
||||||
|
|
||||||
|
|
||||||
|
### 0.37.0
|
||||||
|
|
||||||
|
#### Int8 Matmul + backward support for all GPUs
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Int8 MatmulLt now supports backward through inversion of the ColTuring/ColAmpere format. Slow, but memory efficient. Big thanks to @borzunov
|
||||||
|
- Int8 now supported on all GPUs. On devices with compute capability < 7.5, the Int weights are cast to 16/32-bit for the matrix multiplication. Contributed by @borzunov
|
||||||
|
|
||||||
|
Improvements:
|
||||||
|
- Improved logging for the CUDA detection mechanism.
|
||||||
|
|
|
@ -80,9 +80,10 @@ class CUDASetup:
|
||||||
self.add_log_entry('python setup.py install')
|
self.add_log_entry('python setup.py install')
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
self.has_printed = False
|
if not getattr(self, 'initialized', False):
|
||||||
self.lib = None
|
self.has_printed = False
|
||||||
self.initialized = False
|
self.lib = None
|
||||||
|
self.initialized = False
|
||||||
|
|
||||||
def run_cuda_setup(self):
|
def run_cuda_setup(self):
|
||||||
self.initialized = True
|
self.initialized = True
|
||||||
|
@ -103,7 +104,7 @@ class CUDASetup:
|
||||||
legacy_binary_name = "libbitsandbytes_cpu.so"
|
legacy_binary_name = "libbitsandbytes_cpu.so"
|
||||||
self.add_log_entry(f"CUDA SETUP: Defaulting to {legacy_binary_name}...")
|
self.add_log_entry(f"CUDA SETUP: Defaulting to {legacy_binary_name}...")
|
||||||
binary_path = package_dir / legacy_binary_name
|
binary_path = package_dir / legacy_binary_name
|
||||||
if not binary_path.exists():
|
if not binary_path.exists() or torch.cuda.is_available():
|
||||||
self.add_log_entry('')
|
self.add_log_entry('')
|
||||||
self.add_log_entry('='*48 + 'ERROR' + '='*37)
|
self.add_log_entry('='*48 + 'ERROR' + '='*37)
|
||||||
self.add_log_entry('CUDA SETUP: CUDA detection failed! Possible reasons:')
|
self.add_log_entry('CUDA SETUP: CUDA detection failed! Possible reasons:')
|
||||||
|
@ -112,6 +113,7 @@ class CUDASetup:
|
||||||
self.add_log_entry('3. You have multiple conflicting CUDA libraries')
|
self.add_log_entry('3. You have multiple conflicting CUDA libraries')
|
||||||
self.add_log_entry('4. Required library not pre-compiled for this bitsandbytes release!')
|
self.add_log_entry('4. Required library not pre-compiled for this bitsandbytes release!')
|
||||||
self.add_log_entry('CUDA SETUP: If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION` for example, `make CUDA_VERSION=113`.')
|
self.add_log_entry('CUDA SETUP: If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION` for example, `make CUDA_VERSION=113`.')
|
||||||
|
self.add_log_entry('CUDA SETUP: The CUDA version for the compile might depend on your conda install. Inspect CUDA version via `conda list | grep cuda`.')
|
||||||
self.add_log_entry('='*80)
|
self.add_log_entry('='*80)
|
||||||
self.add_log_entry('')
|
self.add_log_entry('')
|
||||||
self.generate_instructions()
|
self.generate_instructions()
|
||||||
|
@ -148,7 +150,7 @@ def is_cublasLt_compatible(cc):
|
||||||
if cc is not None:
|
if cc is not None:
|
||||||
cc_major, cc_minor = cc.split('.')
|
cc_major, cc_minor = cc.split('.')
|
||||||
if int(cc_major) < 7 or (int(cc_major) == 7 and int(cc_minor) < 5):
|
if int(cc_major) < 7 or (int(cc_major) == 7 and int(cc_minor) < 5):
|
||||||
cuda_setup.add_log_entry("WARNING: Compute capability < 7.5 detected! Proceeding to load CPU-only library...", is_warning=True)
|
cuda_setup.add_log_entry("WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU!", is_warning=True)
|
||||||
else:
|
else:
|
||||||
has_cublaslt = True
|
has_cublaslt = True
|
||||||
return has_cublaslt
|
return has_cublaslt
|
||||||
|
@ -362,7 +364,6 @@ def evaluate_cuda_setup():
|
||||||
print('')
|
print('')
|
||||||
print('='*35 + 'BUG REPORT' + '='*35)
|
print('='*35 + 'BUG REPORT' + '='*35)
|
||||||
print('Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues')
|
print('Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues')
|
||||||
print('For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link')
|
|
||||||
print('='*80)
|
print('='*80)
|
||||||
if not torch.cuda.is_available(): return 'libsbitsandbytes_cpu.so', None, None, None, None
|
if not torch.cuda.is_available(): return 'libsbitsandbytes_cpu.so', None, None, None, None
|
||||||
|
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -18,7 +18,7 @@ def read(fname):
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name=f"bitsandbytes",
|
name=f"bitsandbytes",
|
||||||
version=f"0.36.0-2",
|
version=f"0.37.0",
|
||||||
author="Tim Dettmers",
|
author="Tim Dettmers",
|
||||||
author_email="dettmers@cs.washington.edu",
|
author_email="dettmers@cs.washington.edu",
|
||||||
description="8-bit optimizers and matrix multiplication routines.",
|
description="8-bit optimizers and matrix multiplication routines.",
|
||||||
|
|
Loading…
Reference in New Issue
Block a user