diff --git a/CHANGELOG.md b/CHANGELOG.md index 2de70d3..eb7ac0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -228,3 +228,14 @@ Deprecated: Features: - Added Int8 SwitchBack layers - Added Fake FP8 layers for research purposes (available under `bnb.research.nn. ...`) + + +### 0.39.0 + + +Features: + - 4-bit matrix multiplication for Float4 and NormalFloat4 data types. + - Added 4-bit quantization routines + - Doubled quantization routines for 4-bit quantization + - Paged optimizers for Adam and Lion. + - bfloat16 gradient / weight support for Adam and Lion with 8 or 32-bit states. diff --git a/Makefile b/Makefile index c113a3d..1f2b281 100644 --- a/Makefile +++ b/Makefile @@ -25,8 +25,7 @@ FILES_CUDA := $(CSRC)/ops.cu $(CSRC)/kernels.cu FILES_CPP := $(CSRC)/common.cpp $(CSRC)/cpu_ops.cpp $(CSRC)/pythonInterface.c INCLUDE := -I $(CUDA_HOME)/include -I $(ROOT_DIR)/csrc -I $(CONDA_PREFIX)/include -I $(ROOT_DIR)/include -INCLUDE_10x := -I $(CUDA_HOME)/include -I $(ROOT_DIR)/csrc -I $(ROOT_DIR)/dependencies/cub -I $(ROOT_DIR)/include -LIB := -L $(CUDA_HOME)/lib64 -lcudart -lcublas -lcublasLt -lcurand -lcusparse -L $(CONDA_PREFIX)/lib +LIB := -L $(CUDA_HOME)/lib64 -lcudart -lcublas -lcublasLt -lcusparse -L $(CONDA_PREFIX)/lib # NVIDIA NVCC compilation flags COMPUTE_CAPABILITY += -gencode arch=compute_50,code=sm_50 # Maxwell diff --git a/setup.py b/setup.py index 009fd3d..b683bfc 100644 --- a/setup.py +++ b/setup.py @@ -18,10 +18,10 @@ def read(fname): setup( name=f"bitsandbytes", - version=f"0.38.1", + version=f"0.39.0", author="Tim Dettmers", author_email="dettmers@cs.washington.edu", - description="8-bit optimizers and matrix multiplication routines.", + description="k-bit optimizers and matrix multiplication routines.", license="MIT", keywords="gpu optimizers optimization 8-bit quantization compression", url="https://github.com/TimDettmers/bitsandbytes",