diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2de70d3..eb7ac0d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -228,3 +228,14 @@ Deprecated:
 Features:
  - Added Int8 SwitchBack layers
  - Added Fake FP8 layers for research purposes (available under `bnb.research.nn. ...`)
+
+
+### 0.39.0
+
+
+Features:
+ - 4-bit matrix multiplication for Float4 and NormalFloat4 data types.
+ - Added 4-bit quantization routines
+ - Doubled quantization routines for 4-bit quantization
+ - Paged optimizers for Adam and Lion.
+ - bfloat16 gradient / weight support for Adam and Lion with 8 or 32-bit states.
diff --git a/Makefile b/Makefile
index c113a3d..1f2b281 100644
--- a/Makefile
+++ b/Makefile
@@ -25,8 +25,7 @@ FILES_CUDA := $(CSRC)/ops.cu $(CSRC)/kernels.cu
 FILES_CPP := $(CSRC)/common.cpp $(CSRC)/cpu_ops.cpp $(CSRC)/pythonInterface.c
 
 INCLUDE :=  -I $(CUDA_HOME)/include -I $(ROOT_DIR)/csrc -I $(CONDA_PREFIX)/include -I $(ROOT_DIR)/include
-INCLUDE_10x :=  -I $(CUDA_HOME)/include -I $(ROOT_DIR)/csrc -I $(ROOT_DIR)/dependencies/cub -I $(ROOT_DIR)/include
-LIB := -L $(CUDA_HOME)/lib64 -lcudart -lcublas -lcublasLt -lcurand -lcusparse -L $(CONDA_PREFIX)/lib
+LIB := -L $(CUDA_HOME)/lib64 -lcudart -lcublas -lcublasLt -lcusparse -L $(CONDA_PREFIX)/lib
 
 # NVIDIA NVCC compilation flags
 COMPUTE_CAPABILITY += -gencode arch=compute_50,code=sm_50 # Maxwell
diff --git a/setup.py b/setup.py
index 009fd3d..b683bfc 100644
--- a/setup.py
+++ b/setup.py
@@ -18,10 +18,10 @@ def read(fname):
 
 setup(
     name=f"bitsandbytes",
-    version=f"0.38.1",
+    version=f"0.39.0",
     author="Tim Dettmers",
     author_email="dettmers@cs.washington.edu",
-    description="8-bit optimizers and matrix multiplication routines.",
+    description="k-bit optimizers and matrix multiplication routines.",
     license="MIT",
     keywords="gpu optimizers optimization 8-bit quantization compression",
     url="https://github.com/TimDettmers/bitsandbytes",