From 89e3b82731db66eb4bb0c0690f1f623c8ef6df65 Mon Sep 17 00:00:00 2001
From: Tim Dettmers <tim.dettmers@gmail.com>
Date: Tue, 11 Apr 2023 13:47:10 -0700
Subject: [PATCH] Added more detailed cuda setup debug and debugging
 instructions.

---
 README.md                       | 50 ++++++++++++++-----
 bitsandbytes/__main__.py        | 87 ++++++++++++++++++++++++++++-----
 bitsandbytes/cuda_setup/main.py |  3 +-
 compile_from_source.md          | 27 +++++++---
 cuda_install.sh                 |  4 +-
 errors_and_solutions.md         |  2 +-
 6 files changed, 139 insertions(+), 34 deletions(-)

diff --git a/README.md b/README.md
index de6b27b..600401c 100644
--- a/README.md
+++ b/README.md
@@ -11,27 +11,40 @@ Resources:
 
 ## TL;DR
 **Requirements**
-Python >=3.8. Linux distribution (Ubuntu, MacOS, etc.) + CUDA > 10.0. LLM.int8() requires Turing or Ampere GPUs.
+Python >=3.8. Linux distribution (Ubuntu, MacOS, etc.) + CUDA > 10.0.
+
+(Deprecated: CUDA 10.0 is deprecated and only CUDA >= 11.0) will be supported with release 0.39.0)
 
 **Installation**:
 
 ``pip install bitsandbytes``
 
-In some cases it can happen that you need to compile from source. In that case, you can install CUDA with the install script in the repository. No sudo is required for this install.
+In some cases it can happen that you need to compile from source. If this happens please consider submitting a bug report with `python -m bitsandbytes` information. What now follows is some short instructions which might work out of the box if `nvcc` is installed. If these do not work see further below.
 
+Compilation quickstart:
 ```bash
-wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/cuda_install.sh
-# Syntax cuda_install CUDA_VERSION INSTALL_PREFIX EXPORT_TO_BASH
-#   CUDA_VERSION in {110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121}
-#   EXPORT_TO_BASH in {0, 1} with 0=False and 1=True 
+git clone https://github.com/timdettmers/bitsandbytes.git
+cd bitsandbytes
 
-# For example, the following installs CUDA 11.8 to ~/local/cuda-11.8 and exports the path to your .bashrc
-bash cuda install 118 ~/local 1 
+# CUDA_VERSIONS in {110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 120}
+# make argument in {cuda110, cuda11x, cuda12x}
+# if you do not know what CUDA you have, try looking at the output of: python -m bitsandbytes
+CUDA_VERSION=117 make cuda11x
+python setup.py install
 ```
 
-To use a specific CUDA version just for a single compile run, you can set the variable `CUDA_HOME`, for example the following command compiles `libbitsandbytes_cuda117.so` using compiler flags for cuda11x with the cuda version at `~/local/cuda-11.7`:
+**Using Int8 inference with HuggingFace Transformers**
 
-``CUDA_HOME=~/local/cuda-11.7 CUDA_VERSION=117 make cuda11x``
+```python
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+  'decapoda-research/llama-7b-hf,
+  device_map='auto',
+  load_in_8bit=True,
+  max_memory=f'{int(torch.cuda.mem_get_info()[0]/1024**3)-2}GB')
+```
+
+A more detailed example, can be found in [examples/int8_inference_huggingface.py](examples/int8_inference_huggingface.py).
 
 **Using 8-bit optimizer**:
 1. Comment out optimizer: ``#torch.optim.Adam(....)``
@@ -130,8 +143,23 @@ For upcoming features and changes and full history see [Patch Notes](CHANGELOG.m
 2. __fatbinwrap_.. [Solution](errors_and_solutions.md#fatbinwrap_)
 
 ## Compile from source
+To compile from source, you need an installation of CUDA. If `nvcc` is not installed, you can install the CUDA Toolkit with nvcc through the following commands.
 
-To compile from source, please follow the [compile_from_source.md](compile_from_source.md) instructions.
+```bash
+wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/cuda_install.sh
+# Syntax cuda_install CUDA_VERSION INSTALL_PREFIX EXPORT_TO_BASH
+#   CUDA_VERSION in {110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121}
+#   EXPORT_TO_BASH in {0, 1} with 0=False and 1=True 
+
+# For example, the following installs CUDA 11.8 to ~/local/cuda-11.8 and exports the path to your .bashrc
+bash cuda install 118 ~/local 1 
+```
+
+To use a specific CUDA version just for a single compile run, you can set the variable `CUDA_HOME`, for example the following command compiles `libbitsandbytes_cuda117.so` using compiler flags for cuda11x with the cuda version at `~/local/cuda-11.7`:
+
+``CUDA_HOME=~/local/cuda-11.7 CUDA_VERSION=117 make cuda11x``
+
+For more detailed instruction, please follow the [compile_from_source.md](compile_from_source.md) instructions.
 
 ## License
 
diff --git a/bitsandbytes/__main__.py b/bitsandbytes/__main__.py
index f45fc34..a100b29 100644
--- a/bitsandbytes/__main__.py
+++ b/bitsandbytes/__main__.py
@@ -1,11 +1,82 @@
 import os
 import sys
+import shlex
+import subprocess
+
 from warnings import warn
+from typing import Tuple
+from os.path import isdir
 
 import torch
 
 HEADER_WIDTH = 60
 
+def execute_and_return(command_string: str) -> Tuple[str, str]:
+    def _decode(subprocess_err_out_tuple):
+        return tuple(
+            to_decode.decode("UTF-8").strip()
+            for to_decode in subprocess_err_out_tuple
+        )
+
+    def execute_and_return_decoded_std_streams(command_string):
+        return _decode(
+            subprocess.Popen(
+                shlex.split(command_string),
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            ).communicate()
+        )
+
+    std_out, std_err = execute_and_return_decoded_std_streams(command_string)
+    return std_out, std_err
+
+def find_file_recursive(folder, filename):
+    cmd = f'find {folder} -name {filename}'
+    out, err = execute_and_return(cmd)
+    if len(err) > 0:
+        raise RuntimeError('Something when wrong when trying to find file. Maybe you do not have a linux system?')
+
+    return out
+
+
+def generate_bug_report_information():
+    print_header("")
+    print_header("BUG REPORT INFORMATION")
+    print_header("")
+    print('')
+
+    if 'CONDA_PREFIX' in os.environ:
+        paths = find_file_recursive(os.environ['CONDA_PREFIX'], '*cuda*so')
+        print_header("ANACONDA CUDA PATHS")
+        print(paths)
+        print('')
+    if isdir('/usr/local/'):
+        paths = find_file_recursive('/usr/local', '*cuda*so')
+        print_header("/usr/local CUDA PATHS")
+        print(paths)
+        print('')
+
+    if isdir(os.getcwd()):
+        paths = find_file_recursive(os.getcwd(), '*cuda*so')
+        print_header("WORKING DIRECTORY CUDA PATHS")
+        print(paths)
+        print('')
+
+    print_header("LD_LIBRARY CUDA PATHS")
+    lib_path = os.environ['LD_LIBRARY_PATH'].strip()
+    for path in set(lib_path.split(':')):
+        try:
+            if isdir(path):
+                print_header(f"{path} CUDA PATHS")
+                paths = find_file_recursive(path, '*cuda*so')
+                print(paths)
+        except:
+            print(f'Could not read LD_LIBRARY_PATH: {path}')
+    print('')
+
+
+
+
 
 def print_header(
     txt: str, width: int = HEADER_WIDTH, filler: str = "+"
@@ -21,25 +92,13 @@ def print_debug_info() -> None:
     )
 
 
-print_header("")
-print_header("DEBUG INFORMATION")
-print_header("")
-print()
+generate_bug_report_information()
 
 
 from . import COMPILED_WITH_CUDA, PACKAGE_GITHUB_URL
 from .cuda_setup.env_vars import to_be_ignored
 from .cuda_setup.main import get_compute_capabilities, get_cuda_lib_handle
 
-print_header("POTENTIALLY LIBRARY-PATH-LIKE ENV VARS")
-for k, v in os.environ.items():
-    if "/" in v and not to_be_ignored(k, v):
-        print(f"'{k}': '{v}'")
-print_header("")
-
-print(
-    "\nWARNING: Please be sure to sanitize sensible info from any such env vars!\n"
-)
 
 print_header("OTHER")
 print(f"COMPILED_WITH_CUDA = {COMPILED_WITH_CUDA}")
@@ -55,6 +114,7 @@ Running a quick check that:
     + CUDA function is callable
 """
 )
+print("\nWARNING: Please be sure to sanitize sensible info from any such env vars!\n")
 
 try:
     from bitsandbytes.optim import Adam
@@ -91,3 +151,4 @@ except Exception as e:
     print(e)
     print_debug_info()
     sys.exit(1)
+
diff --git a/bitsandbytes/cuda_setup/main.py b/bitsandbytes/cuda_setup/main.py
index 776bee5..2cadbd7 100644
--- a/bitsandbytes/cuda_setup/main.py
+++ b/bitsandbytes/cuda_setup/main.py
@@ -373,7 +373,8 @@ def evaluate_cuda_setup():
     if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
         print('')
         print('='*35 + 'BUG REPORT' + '='*35)
-        print('Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues')
+        print(('Welcome to bitsandbytes. For bug reports, please run\n\npython -m bitsandbytes\n\n'),
+              ('and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues'))
         print('='*80)
     if not torch.cuda.is_available(): return 'libbitsandbytes_cpu.so', None, None, None, None
 
diff --git a/compile_from_source.md b/compile_from_source.md
index c126341..7edb33f 100644
--- a/compile_from_source.md
+++ b/compile_from_source.md
@@ -1,20 +1,35 @@
 # Compiling from source
 
 Basic steps.
-1. `make [target]` where `[target]` is among `cuda92, cuda10x, cuda110, cuda11x, cuda12x, cpuonly`
-2. `CUDA_VERSION=XXX python setup.py install`
+1. `CUDA_VERSION=XXX make [target]` where `[target]` is among `cuda92, cuda10x, cuda110, cuda11x, cuda12x, cpuonly`
+2. `python setup.py install`
 
 To run these steps you will need to have the nvcc compiler installed that comes with a CUDA installation. If you use anaconda (recommended) then you can figure out which version of CUDA you are using with PyTorch via the command `conda list | grep cudatoolkit`. Then you can install the nvcc compiler by downloading and installing the same CUDA version from the [CUDA toolkit archive](https://developer.nvidia.com/cuda-toolkit-archive).
 
-For your convenience, there is an installation script in the root directory that installs CUDA 11.1 locally and configures it automatically. After installing you should add the `bin` sub-directory to the `$PATH` variable to make the compiler visible to your system. To do this you can add this to your `.bashrc` by executing these commands:
+You can install CUDA locally without sudo by following the following steps:
+
 ```bash
-echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64/" >> ~/.bashrc
-echo "export PATH=$PATH:/usr/local/cuda/bin/" >> ~/.bashrc
-source ~/.bashrc
+wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/cuda_install.sh
+# Syntax cuda_install CUDA_VERSION INSTALL_PREFIX EXPORT_TO_BASH
+#   CUDA_VERSION in {110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121}
+#   EXPORT_TO_BASH in {0, 1} with 0=False and 1=True 
+
+# For example, the following installs CUDA 11.7 to ~/local/cuda-11.7 and exports the path to your .bashrc
+bash cuda install 117 ~/local 1 
 ```
 
 By default, the Makefile will look at your `CUDA_HOME` environmental variable to find your CUDA version for compiling the library. If this path is not set it is inferred from the path of your `nvcc` compiler.
 
 Either `nvcc` needs to be in path for the `CUDA_HOME` variable needs to be set to the CUDA directory root (e.g. `/usr/local/cuda`) in order for compilation to succeed
 
+If you type `nvcc` and it cannot be found, you might need to add to your path or set the CUDA_HOME variable. You can run `python -m bitsandbytes` to find the path to CUDA. For example if `python -m bitsandbytes` shows you the following:
+```
+++++++++++++++++++ /usr/local CUDA PATHS +++++++++++++++++++
+/usr/local/cuda-11.7/targets/x86_64-linux/lib/libcudart.so
+```
+You can set `CUDA_HOME` to `/usr/local/cuda-11.7`. For example, you might be able to compile like this.
+
+``CUDA_HOME=~/local/cuda-11.7 CUDA_VERSION=117 make cuda11x``
+
+
 If you have problems compiling the library with these instructions from source, please open an issue.
diff --git a/cuda_install.sh b/cuda_install.sh
index b333f33..2e6c7d1 100644
--- a/cuda_install.sh
+++ b/cuda_install.sh
@@ -77,8 +77,8 @@ FILE=$(basename $URL)
 if [[ -n "$CUDA_VERSION" ]]; then
   echo $URL
   echo $FILE
-  #wget $URL
-  #bash $FILE --no-drm --no-man-page --override --toolkitpath=$BASE_PATH/$FOLDER/ --toolkit --silent
+  wget $URL
+  bash $FILE --no-drm --no-man-page --override --toolkitpath=$BASE_PATH/$FOLDER/ --toolkit --silent
   if [ "$EXPORT_BASHRC" -eq "1" ]; then
     echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:$BASE_PATH/$FOLDER/lib64" >> ~/.bashrc
     echo "export PATH=\$PATH:$BASE_PATH/$FOLDER/bin" >> ~/.bashrc
diff --git a/errors_and_solutions.md b/errors_and_solutions.md
index 5e8b2d2..5b8cbcd 100644
--- a/errors_and_solutions.md
+++ b/errors_and_solutions.md
@@ -1,6 +1,6 @@
 # No kernel image available
 
-This problem arises with the cuda version loaded by bitsandbytes is not supported by your GPU, or if you pytorch CUDA version mismatches. So solve this problem you need to debug ``$LD_LIBRARY_PATH``, ``$CUDA_HOME``, ``$PATH``. You can print these via ``echo $PATH``. You should look for multiple paths to different CUDA versions. This can include versions in your anaconda path, for example ``$HOME/anaconda3/lib``. You can check those versions via ``ls -l $HOME/anaconda3/lib/*cuda*`` or equivalent paths. Look at the CUDA versions of files in these paths. Does it match with ``nvidia-smi``?
+This problem arises with the cuda version loaded by bitsandbytes is not supported by your GPU, or if you pytorch CUDA version mismatches. To solve this problem you need to debug ``$LD_LIBRARY_PATH``, ``$CUDA_HOME``, ``$PATH``. You can print these via ``echo $PATH``. You should look for multiple paths to different CUDA versions. This can include versions in your anaconda path, for example ``$HOME/anaconda3/lib``. You can check those versions via ``ls -l $HOME/anaconda3/lib/*cuda*`` or equivalent paths. Look at the CUDA versions of files in these paths. Does it match with ``nvidia-smi``?
 
 If you are feeling lucky, you can also try to compile the library from source. This can be still problematic if your PATH variables have multiple cuda versions. As such, it is recommended to figure out path conflicts before you proceed with compilation.