Fixed CUDA Conda PyTorch 2.0 issues.
This commit is contained in:
parent
2bb5c00ba9
commit
4cd63deff3
17
README.md
17
README.md
|
@ -14,8 +14,25 @@ Resources:
|
|||
Python >=3.8. Linux distribution (Ubuntu, MacOS, etc.) + CUDA > 10.0. LLM.int8() requires Turing or Ampere GPUs.
|
||||
|
||||
**Installation**:
|
||||
|
||||
``pip install bitsandbytes``
|
||||
|
||||
In some cases it can happen that you need to compile from source. In that case, you can install CUDA with the install script in the repository. No sudo is required for this install.
|
||||
|
||||
```bash
|
||||
wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/cuda_install.sh
|
||||
# Syntax cuda_install CUDA_VERSION INSTALL_PREFIX EXPORT_TO_BASH
|
||||
# CUDA_VERSION in {110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121}
|
||||
# EXPORT_TO_BASH in {0, 1} with 0=False and 1=True
|
||||
|
||||
# For example, the following installs CUDA 11.8 to ~/local/cuda-11.8 and exports the path to your .bashrc
|
||||
bash cuda install 118 ~/local 1
|
||||
```
|
||||
|
||||
To use a specific CUDA version just for a single compile run, you can set the variable `CUDA_HOME`, for example the following command compiles `libbitsandbytes_cuda117.so` using compiler flags for cuda11x with the cuda version at `~/local/cuda-11.7`:
|
||||
|
||||
``CUDA_HOME=~/local/cuda-11.7 CUDA_VERSION=117 make cuda11x``
|
||||
|
||||
**Using 8-bit optimizer**:
|
||||
1. Comment out optimizer: ``#torch.optim.Adam(....)``
|
||||
2. Add 8-bit optimizer of your choice ``bnb.optim.Adam8bit(....)`` (arguments stay the same)
|
||||
|
|
|
@ -11,8 +11,6 @@ from bitsandbytes.cuda_setup.main import CUDASetup
|
|||
setup = CUDASetup.get_instance()
|
||||
if setup.initialized != True:
|
||||
setup.run_cuda_setup()
|
||||
if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
|
||||
setup.print_log_stack()
|
||||
|
||||
lib = setup.lib
|
||||
try:
|
||||
|
@ -31,3 +29,7 @@ except AttributeError:
|
|||
warn("The installed version of bitsandbytes was compiled without GPU support. "
|
||||
"8-bit optimizers and GPU quantization are unavailable.")
|
||||
COMPILED_WITH_CUDA = False
|
||||
|
||||
# print the setup details after checking for errors so we do not print twice
|
||||
if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
|
||||
setup.print_log_stack()
|
||||
|
|
|
@ -21,12 +21,21 @@ import os
|
|||
import errno
|
||||
import torch
|
||||
from warnings import warn
|
||||
from itertools import product
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Set, Union
|
||||
from .env_vars import get_potentially_lib_path_containing_env_vars
|
||||
|
||||
CUDA_RUNTIME_LIB: str = "libcudart.so"
|
||||
# these are the most common libs names
|
||||
# libcudart.so is missing by default for a conda install with PyTorch 2.0 and instead
|
||||
# we have libcudart.so.11.0 which causes a lot of errors before
|
||||
# not sure if libcudart.so.12.0 exists in pytorch installs, but it does not hurt
|
||||
CUDA_RUNTIME_LIBS: list = ["libcudart.so", 'libcudart.so.11.0', 'libcudart.so.12.0']
|
||||
|
||||
# this is a order list of backup paths to search CUDA in, if it cannot be found in the main environmental paths
|
||||
backup_paths = []
|
||||
backup_paths.append('$CONDA_PREFIX/lib/libcudart.so.11.0')
|
||||
|
||||
class CUDASetup:
|
||||
_instance = None
|
||||
|
@ -98,6 +107,8 @@ class CUDASetup:
|
|||
package_dir = Path(__file__).parent.parent
|
||||
binary_path = package_dir / binary_name
|
||||
|
||||
print('bin', binary_path)
|
||||
|
||||
try:
|
||||
if not binary_path.exists():
|
||||
self.add_log_entry(f"CUDA SETUP: Required library version not found: {binary_name}. Maybe you need to compile it from source?")
|
||||
|
@ -117,7 +128,6 @@ class CUDASetup:
|
|||
self.add_log_entry('='*80)
|
||||
self.add_log_entry('')
|
||||
self.generate_instructions()
|
||||
self.print_log_stack()
|
||||
raise Exception('CUDA SETUP: Setup Failed!')
|
||||
self.lib = ct.cdll.LoadLibrary(binary_path)
|
||||
else:
|
||||
|
@ -125,7 +135,6 @@ class CUDASetup:
|
|||
self.lib = ct.cdll.LoadLibrary(binary_path)
|
||||
except Exception as ex:
|
||||
self.add_log_entry(str(ex))
|
||||
self.print_log_stack()
|
||||
|
||||
def add_log_entry(self, msg, is_warning=False):
|
||||
self.cuda_setup_log.append((msg, is_warning))
|
||||
|
@ -178,11 +187,12 @@ def remove_non_existent_dirs(candidate_paths: Set[Path]) -> Set[Path]:
|
|||
|
||||
|
||||
def get_cuda_runtime_lib_paths(candidate_paths: Set[Path]) -> Set[Path]:
|
||||
return {
|
||||
path / CUDA_RUNTIME_LIB
|
||||
for path in candidate_paths
|
||||
if (path / CUDA_RUNTIME_LIB).is_file()
|
||||
}
|
||||
paths = set()
|
||||
for libname in CUDA_RUNTIME_LIBS:
|
||||
for path in candidate_paths:
|
||||
if (path / libname).is_file():
|
||||
paths.add(path / libname)
|
||||
return paths
|
||||
|
||||
|
||||
def resolve_paths_list(paths_list_candidate: str) -> Set[Path]:
|
||||
|
@ -257,7 +267,7 @@ def determine_cuda_runtime_lib_path() -> Union[Path, None]:
|
|||
cuda_runtime_libs.update(find_cuda_lib_in(value))
|
||||
|
||||
if len(cuda_runtime_libs) == 0:
|
||||
CUDASetup.get_instance().add_log_entry('CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching /usr/local/cuda/lib64...')
|
||||
CUDASetup.get_instance().add_log_entry('CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...')
|
||||
cuda_runtime_libs.update(find_cuda_lib_in('/usr/local/cuda/lib64'))
|
||||
|
||||
warn_in_case_of_duplicates(cuda_runtime_libs)
|
||||
|
|
|
@ -17,6 +17,7 @@ URL121=https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installer
|
|||
|
||||
CUDA_VERSION=$1
|
||||
BASE_PATH=$2
|
||||
EXPORT_BASHRC=$3
|
||||
|
||||
if [[ -n "$CUDA_VERSION" ]]; then
|
||||
if [[ "$CUDA_VERSION" -eq "92" ]]; then
|
||||
|
@ -76,11 +77,13 @@ FILE=$(basename $URL)
|
|||
if [[ -n "$CUDA_VERSION" ]]; then
|
||||
echo $URL
|
||||
echo $FILE
|
||||
wget $URL
|
||||
bash $FILE --no-drm --no-man-page --override --toolkitpath=$BASE_PATH/$FOLDER/ --toolkit --silent
|
||||
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$BASE_PATH/$FOLDER/lib64/" >> ~/.bashrc
|
||||
echo "export PATH=$PATH:$BASE_PATH/$FOLDER/bin/" >> ~/.bashrc
|
||||
source ~/.bashrc
|
||||
#wget $URL
|
||||
#bash $FILE --no-drm --no-man-page --override --toolkitpath=$BASE_PATH/$FOLDER/ --toolkit --silent
|
||||
if [ "$EXPORT_BASHRC" -eq "1" ]; then
|
||||
echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:$BASE_PATH/$FOLDER/lib64" >> ~/.bashrc
|
||||
echo "export PATH=\$PATH:$BASE_PATH/$FOLDER/bin" >> ~/.bashrc
|
||||
source ~/.bashrc
|
||||
fi
|
||||
else
|
||||
echo ""
|
||||
fi
|
||||
|
|
|
@ -5,95 +5,20 @@ import pytest
|
|||
|
||||
import bitsandbytes as bnb
|
||||
from bitsandbytes.cuda_setup.main import (
|
||||
CUDA_RUNTIME_LIB,
|
||||
determine_cuda_runtime_lib_path,
|
||||
evaluate_cuda_setup,
|
||||
extract_candidate_paths,
|
||||
)
|
||||
|
||||
"""
|
||||
'LD_LIBRARY_PATH': ':/mnt/D/titus/local/cuda-11.1/lib64/'
|
||||
'CONDA_EXE': '/mnt/D/titus/miniconda/bin/conda'
|
||||
'LESSCLOSE': '/usr/bin/lesspipe %s %s'
|
||||
'OLDPWD': '/mnt/D/titus/src'
|
||||
'CONDA_PREFIX': '/mnt/D/titus/miniconda/envs/8-bit'
|
||||
'SSH_AUTH_SOCK': '/mnt/D/titus/.ssh/ssh-agent.tim-uw.sock'
|
||||
'CONDA_PREFIX_1': '/mnt/D/titus/miniconda'
|
||||
'PWD': '/mnt/D/titus/src/8-bit'
|
||||
'HOME': '/mnt/D/titus'
|
||||
'CONDA_PYTHON_EXE': '/mnt/D/titus/miniconda/bin/python'
|
||||
'CUDA_HOME': '/mnt/D/titus/local/cuda-11.1/'
|
||||
'TMUX': '/tmp/tmux-1007/default,59286,1'
|
||||
'XDG_DATA_DIRS': '/usr/local/share:/usr/share:/var/lib/snapd/desktop'
|
||||
'SSH_TTY': '/dev/pts/0'
|
||||
'MAIL': '/var/mail/titus'
|
||||
'SHELL': '/bin/bash'
|
||||
'DBUS_SESSION_BUS_ADDRESS': 'unix:path=/run/user/1007/bus'
|
||||
'XDG_RUNTIME_DIR': '/run/user/1007'
|
||||
'PATH': '/mnt/D/titus/miniconda/envs/8-bit/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/mnt/D/titus/local/cuda-11.1/bin'
|
||||
'LESSOPEN': '| /usr/bin/lesspipe %s'
|
||||
'_': '/mnt/D/titus/miniconda/envs/8-bit/bin/python'
|
||||
# any that include 'CONDA' that are not 'CONDA_PREFIX'
|
||||
|
||||
# we search for
|
||||
'CUDA_HOME': '/mnt/D/titus/local/cuda-11.1/'
|
||||
"""
|
||||
|
||||
|
||||
class InputAndExpectedOutput(NamedTuple):
|
||||
input: str
|
||||
output: str
|
||||
|
||||
|
||||
HAPPY_PATH__LD_LIB_TEST_PATHS: List[InputAndExpectedOutput] = [
|
||||
(
|
||||
f"some/other/dir:dir/with/{CUDA_RUNTIME_LIB}",
|
||||
f"dir/with/{CUDA_RUNTIME_LIB}",
|
||||
),
|
||||
(
|
||||
f":some/other/dir:dir/with/{CUDA_RUNTIME_LIB}",
|
||||
f"dir/with/{CUDA_RUNTIME_LIB}",
|
||||
),
|
||||
(
|
||||
f"some/other/dir:dir/with/{CUDA_RUNTIME_LIB}:",
|
||||
f"dir/with/{CUDA_RUNTIME_LIB}",
|
||||
),
|
||||
(
|
||||
f"some/other/dir::dir/with/{CUDA_RUNTIME_LIB}",
|
||||
f"dir/with/{CUDA_RUNTIME_LIB}",
|
||||
),
|
||||
(
|
||||
f"dir/with/{CUDA_RUNTIME_LIB}:some/other/dir",
|
||||
f"dir/with/{CUDA_RUNTIME_LIB}",
|
||||
),
|
||||
(
|
||||
f"dir/with/{CUDA_RUNTIME_LIB}:other/dir/libcuda.so",
|
||||
f"dir/with/{CUDA_RUNTIME_LIB}",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(params=HAPPY_PATH__LD_LIB_TEST_PATHS)
|
||||
def happy_path_path_string(tmpdir, request):
|
||||
for path in extract_candidate_paths(request.param):
|
||||
test_dir.mkdir()
|
||||
if CUDA_RUNTIME_LIB in path:
|
||||
(test_input / CUDA_RUNTIME_LIB).touch()
|
||||
|
||||
UNHAPPY_PATH__LD_LIB_TEST_PATHS = [
|
||||
f"a/b/c/{CUDA_RUNTIME_LIB}:d/e/f/{CUDA_RUNTIME_LIB}",
|
||||
f"a/b/c/{CUDA_RUNTIME_LIB}:d/e/f/{CUDA_RUNTIME_LIB}:g/h/j/{CUDA_RUNTIME_LIB}",
|
||||
]
|
||||
|
||||
|
||||
def test_full_system():
|
||||
def test_cuda_full_system():
|
||||
## this only tests the cuda version and not compute capability
|
||||
|
||||
# if CONDA_PREFIX exists, it has priority before all other env variables
|
||||
# but it does not contain the library directly, so we need to look at the a sub-folder
|
||||
version = ""
|
||||
if "CONDA_PREFIX" in os.environ:
|
||||
ls_output, err = bnb.utils.execute_and_return(f'ls -l {os.environ["CONDA_PREFIX"]}/lib/libcudart.so')
|
||||
ls_output, err = bnb.utils.execute_and_return(f'ls -l {os.environ["CONDA_PREFIX"]}/lib/libcudart.so.11.0')
|
||||
major, minor, revision = (ls_output.split(" ")[-1].replace("libcudart.so.", "").split("."))
|
||||
version = float(f"{major}.{minor}")
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user