Polished CUDA SETUP replacement and added docs.

This commit is contained in:
Tim Dettmers 2023-07-14 12:50:59 -07:00
parent 1ab6758b36
commit 55f4c398a0
2 changed files with 72 additions and 15 deletions

View File

@ -101,8 +101,16 @@ class CUDASetup:
def manual_override(self): def manual_override(self):
if torch.cuda.is_available(): if torch.cuda.is_available():
if 'CUDA_HOME' in os.environ and 'CUDA_VERSION' in os.environ: if 'CUDA_VERSION' in os.environ:
if len(os.environ['CUDA_HOME']) > 0 and len(os.environ['CUDA_VERSION']) > 0: if len(os.environ['CUDA_VERSION']) > 0:
warn((f'\n\n{"="*80}\n'
'WARNING: Manual override via CUDA_VERSION env variable detected!\n'
'CUDA_VERSION=XXX can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.\n'
'If this was unintended set the CUDA_VERSION variable to an empty string: export CUDA_VERSION=\n'
'If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH\n'
'For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64\n'
f'Loading CUDA version: CUDA_VERSION={os.environ["CUDA_VERSION"]}'
f'\n{"="*80}\n\n'))
self.binary_name = self.binary_name[:-6] + f'{os.environ["CUDA_VERSION"]}.so' self.binary_name = self.binary_name[:-6] + f'{os.environ["CUDA_VERSION"]}.so'
def run_cuda_setup(self): def run_cuda_setup(self):
@ -194,8 +202,8 @@ def remove_non_existent_dirs(candidate_paths: Set[Path]) -> Set[Path]:
non_existent_directories: Set[Path] = candidate_paths - existent_directories non_existent_directories: Set[Path] = candidate_paths - existent_directories
if non_existent_directories: if non_existent_directories:
CUDASetup.get_instance().add_log_entry("WARNING: The following directories listed in your path were found to " CUDASetup.get_instance().add_log_entry("The following directories listed in your path were found to "
f"be non-existent: {non_existent_directories}", is_warning=True) f"be non-existent: {non_existent_directories}", is_warning=False)
return existent_directories return existent_directories
@ -229,11 +237,12 @@ def warn_in_case_of_duplicates(results_paths: Set[Path]) -> None:
f"Found duplicate {CUDA_RUNTIME_LIBS} files: {results_paths}.. " f"Found duplicate {CUDA_RUNTIME_LIBS} files: {results_paths}.. "
"We select the PyTorch default libcudart.so, which is {torch.version.cuda}," "We select the PyTorch default libcudart.so, which is {torch.version.cuda},"
"but this might missmatch with the CUDA version that is needed for bitsandbytes." "but this might missmatch with the CUDA version that is needed for bitsandbytes."
"To override this behavior set the CUDA_HOME environmental variable" "To override this behavior set the CUDA_VERSION=<version string, e.g. 122> environmental variable"
"For example, if you want to use the CUDA version wht the path" "For example, if you want to use the CUDA version 122"
"/usr/local/cuda-11.2/lib/libcudart.so as the default," "CUDA_VERSION=122 python ..."
"then add the following to your .bashrc:" "OR set the environmental variable in your .bashrc: export CUDA_VERSION=122"
"export CUDA_HOME=/usr/local/cuda-11.2") "In the case of a manual override, make sure you set the LD_LIBRARY_PATH, e.g."
"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2")
CUDASetup.get_instance().add_log_entry(warning_msg, is_warning=True) CUDASetup.get_instance().add_log_entry(warning_msg, is_warning=True)
@ -289,7 +298,8 @@ def determine_cuda_runtime_lib_path() -> Union[Path, None]:
warn_in_case_of_duplicates(cuda_runtime_libs) warn_in_case_of_duplicates(cuda_runtime_libs)
print(cuda_runtime_libs, flush=True) cuda_setup = CUDASetup.get_instance()
cuda_setup.add_log_entry(f'DEBUG: Possible options found for libcudart.so: {cuda_runtime_libs}')
return next(iter(cuda_runtime_libs)) if cuda_runtime_libs else None return next(iter(cuda_runtime_libs)) if cuda_runtime_libs else None
@ -313,15 +323,15 @@ def get_compute_capabilities():
def evaluate_cuda_setup(): def evaluate_cuda_setup():
cuda_setup = CUDASetup.get_instance()
if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0': if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
print('') cuda_setup.add_log_entry('')
print('='*35 + 'BUG REPORT' + '='*35) cuda_setup.add_log_entry('='*35 + 'BUG REPORT' + '='*35)
print(('Welcome to bitsandbytes. For bug reports, please run\n\npython -m bitsandbytes\n\n'), cuda_setup.add_log_entry(('Welcome to bitsandbytes. For bug reports, please run\n\npython -m bitsandbytes\n\n'),
('and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues')) ('and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues'))
print('='*80) cuda_setup.add_log_entry('='*80)
if not torch.cuda.is_available(): return 'libbitsandbytes_cpu.so', None, None, None, None if not torch.cuda.is_available(): return 'libbitsandbytes_cpu.so', None, None, None, None
cuda_setup = CUDASetup.get_instance()
cudart_path = determine_cuda_runtime_lib_path() cudart_path = determine_cuda_runtime_lib_path()
ccs = get_compute_capabilities() ccs = get_compute_capabilities()
ccs.sort() ccs.sort()

View File

@ -0,0 +1,47 @@
## How to use a CUDA version that is different from PyTorch
Some features of bitsandbytes may need a newer CUDA version than regularly supported by PyTorch binaries from conda / pip. In that case you can use the following instructions to load a precompiled bitsandbytes binary that works for you.
## Installing or determining the CUDA installation
Determine the path of the CUDA version that you want to use. Common paths paths are:
```bash
/usr/local/cuda
/usr/local/cuda-XX.X
```
where XX.X is the CUDA version number.
You can also install CUDA version that you need locally with a script provided by bitsandbytes as follows:
```bash
wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/cuda_install.sh
# Syntax cuda_install CUDA_VERSION INSTALL_PREFIX EXPORT_TO_BASH
# CUDA_VERSION in {110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122}
# EXPORT_TO_BASH in {0, 1} with 0=False and 1=True
# For example, the following installs CUDA 11.7 to ~/local/cuda-11.7 and exports the path to your .bashrc
bash cuda install 117 ~/local 1
```
## Setting the environmental variables CUDA_HOME, CUDA_VERSION, and LD_LIBRARY_PATH
To manually override the PyTorch installed CUDA version you need to set to variable, like so:
```bash
export CUDA_HOME=<PATH>
export CUDA_VERSION=<VERSION>
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<PATH>
```
For example, to use the local install path from above:
```bash
export CUDA_HOME=/home/tim/local/cuda-11.7
export CUDA_VERSION=117
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/tim/local/cuda-11.7
```
It is best to add these lines to the `.bashrc` file to make them permanent.
If you now launch bitsandbytes with these environmental variables the PyTorch CUDA version will be overridden by the new CUDA version and a different bitsandbytes library is loaded (in this case version 117).