diff --git a/CHANGELOG.md b/CHANGELOG.md
index 387af03..e35e04d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -255,3 +255,6 @@ Features:
 
 Bug fixes:
  - Added `device` variable for bitsandbytes layers to be compatible with PyTorch layers.
+
+Deprecated:
+ - Binaries for CUDA 11.2, 11.3, 11.6 no longer ship with `pip install bitsandbytes` and need to be compiled from source.
diff --git a/csrc/kernels.cu b/csrc/kernels.cu
index 4b05672..01caf89 100644
--- a/csrc/kernels.cu
+++ b/csrc/kernels.cu
@@ -3312,6 +3312,7 @@ __device__ static float nf4_data[16] = {-1.0, -0.6961928009986877, -0.5250730514
 template <typename T, int THREADS> __global__ void kgemm_4bit_inference(int M, int N, int K, T * __restrict__ const A, unsigned char *B,  float *absmax, T * out,  int lda, int ldb, int ldc, int blocksize)
 {
 
+#if __CUDA_ARCH__ >= 750
 	using namespace nvcuda;
   int col_offset = blockIdx.x *32;
   const int warp_id = threadIdx.x / 32;
@@ -3517,6 +3518,7 @@ template <typename T, int THREADS> __global__ void kgemm_4bit_inference(int M, i
 
   if(col_offset + warp_lane < M)
     out[col_offset + warp_lane] = smem_C[warp_lane];
+#endif
 }
 
 #define num_values_4bit 32
@@ -3544,7 +3546,7 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
 	T local_absmax = T(0.0f);
 
   for(int i = threadIdx.x; i < 16; i++)
-    quant_map[i] = datatype[i];
+    quant_map[i] = T(datatype[i]);
 
   __syncthreads();
 
@@ -3577,8 +3579,14 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
     #pragma unroll
     for(int k = 0; k < num_values_4bit; k++)
     {
-      local_B[k*2] = quant_map[local_B_4bit[k] >> 4]*local_absmax;
-      local_B[k*2 + 1] = quant_map[local_B_4bit[k] & 0x0F]*local_absmax;
+      #if __CUDA_ARCH__ >= 800
+        local_B[k*2] = quant_map[local_B_4bit[k] >> 4]*local_absmax;
+        local_B[k*2 + 1] = quant_map[local_B_4bit[k] & 0x0F]*local_absmax;
+      #else
+        // bf16 multipliation not supported
+        local_B[k*2] = T((float)quant_map[local_B_4bit[k] >> 4]*(float)local_absmax);
+        local_B[k*2 + 1] = T((float)quant_map[local_B_4bit[k] & 0x0F]*(float)local_absmax);
+      #endif
     }
 
     if(inner_idx+num_values_4bit)
@@ -3609,7 +3617,14 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
 
     #pragma unroll
     for(int k = 0; k < num_values_4bit; k++)
-      local_C += (float)(local_A[k]*local_B[k]);
+    {
+      #if __CUDA_ARCH__ >= 800
+        local_C += (float)(local_A[k]*local_B[k]);
+      #else
+        // bf16 multipliation not supported
+        local_C += ((float)local_A[k]*(float)local_B[k]);
+      #endif
+    }
 
   }
 
diff --git a/deploy.sh b/deploy.sh
index a2257a2..bb11fd6 100644
--- a/deploy.sh
+++ b/deploy.sh
@@ -10,103 +10,73 @@ if [[ ! -z "${LD_LIBRARY_PATH}" ]]; then
 fi
 
 
-module unload cuda && echo "no module function available. Probably not on a slurm cluster."
-module unload gcc && echo "no module function available. Probably not on a slurm cluster."
-
-rm -rf dist build
-make cleaneggs
-make cleanlibs
-
-make clean
-export CUDA_HOME=
-export CUDA_VERSION=
-make cpuonly CUDA_VERSION="CPU"
-
-if [ ! -f "./bitsandbytes/libbitsandbytes_cpu.so" ]; then
-  # Control will enter here if $DIRECTORY doesn't exist.
-  echo "Compilation unsuccessul!" 1>&2
-  exit 64
-fi
-
-make clean
-export CUDA_HOME=$BASE_PATH/cuda-11.0
-make cuda110 CUDA_VERSION=110
-
-if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110.so" ]; then
-  # Control will enter here if $DIRECTORY doesn't exist.
-  echo "Compilation unsuccessul!" 1>&2
-  exit 64
-fi
-
-make clean
-export CUDA_HOME=$BASE_PATH/cuda-11.1
-make cuda11x CUDA_VERSION=111
-
-if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111.so" ]; then
-  # Control will enter here if $DIRECTORY doesn't exist.
-  echo "Compilation unsuccessul!" 1>&2
-  exit 64
-fi
-
-make clean
-export CUDA_HOME=$BASE_PATH/cuda-11.2
-make cuda11x CUDA_VERSION=112
-
-if [ ! -f "./bitsandbytes/libbitsandbytes_cuda112.so" ]; then
-  # Control will enter here if $DIRECTORY doesn't exist.
-  echo "Compilation unsuccessul!" 1>&2
-  exit 64
-fi
-
-make clean
-export CUDA_HOME=$BASE_PATH/cuda-11.3
-make cuda11x CUDA_VERSION=113
-
-if [ ! -f "./bitsandbytes/libbitsandbytes_cuda113.so" ]; then
-  # Control will enter here if $DIRECTORY doesn't exist.
-  echo "Compilation unsuccessul!" 1>&2
-  exit 64
-fi
-
-make clean
-export CUDA_HOME=$BASE_PATH/cuda-11.4
-make cuda11x CUDA_VERSION=114
-
-if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114.so" ]; then
-  # Control will enter here if $DIRECTORY doesn't exist.
-  echo "Compilation unsuccessul!" 1>&2
-  exit 64
-fi
-
-make clean
-export CUDA_HOME=$BASE_PATH/cuda-11.5
-make cuda11x CUDA_VERSION=115
-
-if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115.so" ]; then
-  # Control will enter here if $DIRECTORY doesn't exist.
-  echo "Compilation unsuccessul!" 1>&2
-  exit 64
-fi
-
-make clean
-export CUDA_HOME=$BASE_PATH/cuda-11.6
-
-make cuda11x CUDA_VERSION=116
-if [ ! -f "./bitsandbytes/libbitsandbytes_cuda116.so" ]; then
-  # Control will enter here if $DIRECTORY doesn't exist.
-  echo "Compilation unsuccessul!" 1>&2
-  exit 64
-fi
-
-make clean
-export CUDA_HOME=$BASE_PATH/cuda-11.7
-make cuda11x CUDA_VERSION=117
-
-if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117.so" ]; then
-  # Control will enter here if $DIRECTORY doesn't exist.
-  echo "Compilation unsuccessul!" 1>&2
-  exit 64
-fi
+#module unload cuda && echo "no module function available. Probably not on a slurm cluster."
+#module unload gcc && echo "no module function available. Probably not on a slurm cluster."
+#
+#rm -rf dist build
+#make cleaneggs
+#make cleanlibs
+#
+#make clean
+#export CUDA_HOME=
+#export CUDA_VERSION=
+#make cpuonly CUDA_VERSION="CPU"
+#
+#if [ ! -f "./bitsandbytes/libbitsandbytes_cpu.so" ]; then
+#  # Control will enter here if $DIRECTORY doesn't exist.
+#  echo "Compilation unsuccessul!" 1>&2
+#  exit 64
+#fi
+#
+#make clean
+#export CUDA_HOME=$BASE_PATH/cuda-11.0
+#make cuda110 CUDA_VERSION=110
+#
+#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110.so" ]; then
+#  # Control will enter here if $DIRECTORY doesn't exist.
+#  echo "Compilation unsuccessul!" 1>&2
+#  exit 64
+#fi
+#
+#make clean
+#export CUDA_HOME=$BASE_PATH/cuda-11.1
+#make cuda11x CUDA_VERSION=111
+#
+#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111.so" ]; then
+#  # Control will enter here if $DIRECTORY doesn't exist.
+#  echo "Compilation unsuccessul!" 1>&2
+#  exit 64
+#fi
+#
+#make clean
+#export CUDA_HOME=$BASE_PATH/cuda-11.4
+#make cuda11x CUDA_VERSION=114
+#
+#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114.so" ]; then
+#  # Control will enter here if $DIRECTORY doesn't exist.
+#  echo "Compilation unsuccessul!" 1>&2
+#  exit 64
+#fi
+#
+#make clean
+#export CUDA_HOME=$BASE_PATH/cuda-11.5
+#make cuda11x CUDA_VERSION=115
+#
+#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115.so" ]; then
+#  # Control will enter here if $DIRECTORY doesn't exist.
+#  echo "Compilation unsuccessul!" 1>&2
+#  exit 64
+#fi
+#
+#make clean
+#export CUDA_HOME=$BASE_PATH/cuda-11.7
+#make cuda11x CUDA_VERSION=117
+#
+#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117.so" ]; then
+#  # Control will enter here if $DIRECTORY doesn't exist.
+#  echo "Compilation unsuccessul!" 1>&2
+#  exit 64
+#fi
 
 make clean
 export CUDA_HOME=$BASE_PATH/cuda-11.8
@@ -138,6 +108,16 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121.so" ]; then
   exit 64
 fi
 
+make clean
+export CUDA_HOME=$BASE_PATH/cuda-12.2
+make cuda12x CUDA_VERSION=122
+
+if [ ! -f "./bitsandbytes/libbitsandbytes_cuda122.so" ]; then
+  # Control will enter here if $DIRECTORY doesn't exist.
+  echo "Compilation unsuccessul!" 1>&2
+  exit 64
+fi
+
 
 make clean
 export CUDA_HOME=$BASE_PATH/cuda-11.0
@@ -160,26 +140,6 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111_nocublaslt.so" ]; then
   exit 64
 fi
 
-make clean
-export CUDA_HOME=$BASE_PATH/cuda-11.2
-make cuda11x_nomatmul CUDA_VERSION=112
-
-if [ ! -f "./bitsandbytes/libbitsandbytes_cuda112_nocublaslt.so" ]; then
-  # Control will enter here if $DIRECTORY doesn't exist.
-  echo "Compilation unsuccessul!" 1>&2
-  exit 64
-fi
-
-make clean
-export CUDA_HOME=$BASE_PATH/cuda-11.3
-make cuda11x_nomatmul CUDA_VERSION=113
-
-if [ ! -f "./bitsandbytes/libbitsandbytes_cuda113_nocublaslt.so" ]; then
-  # Control will enter here if $DIRECTORY doesn't exist.
-  echo "Compilation unsuccessul!" 1>&2
-  exit 64
-fi
-
 make clean
 export CUDA_HOME=$BASE_PATH/cuda-11.4
 make cuda11x_nomatmul CUDA_VERSION=114
@@ -200,16 +160,6 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115_nocublaslt.so" ]; then
   exit 64
 fi
 
-make clean
-export CUDA_HOME=$BASE_PATH/cuda-11.6
-
-make cuda11x_nomatmul CUDA_VERSION=116
-if [ ! -f "./bitsandbytes/libbitsandbytes_cuda116_nocublaslt.so" ]; then
-  # Control will enter here if $DIRECTORY doesn't exist.
-  echo "Compilation unsuccessul!" 1>&2
-  exit 64
-fi
-
 make clean
 export CUDA_HOME=$BASE_PATH/cuda-11.7
 make cuda11x_nomatmul CUDA_VERSION=117
@@ -250,5 +200,15 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121_nocublaslt.so" ]; then
   exit 64
 fi
 
+make clean
+export CUDA_HOME=$BASE_PATH/cuda-12.2
+make cuda12x_nomatmul CUDA_VERSION=122
+
+if [ ! -f "./bitsandbytes/libbitsandbytes_cuda122_nocublaslt.so" ]; then
+  # Control will enter here if $DIRECTORY doesn't exist.
+  echo "Compilation unsuccessul!" 1>&2
+  exit 64
+fi
+
 python -m build
 python -m twine upload dist/* --verbose