Polished CUDA SETUP replacement and added docs.
This commit is contained in:
parent
1ab6758b36
commit
55f4c398a0
|
@ -101,8 +101,16 @@ class CUDASetup:
|
||||||
|
|
||||||
def manual_override(self):
|
def manual_override(self):
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
if 'CUDA_HOME' in os.environ and 'CUDA_VERSION' in os.environ:
|
if 'CUDA_VERSION' in os.environ:
|
||||||
if len(os.environ['CUDA_HOME']) > 0 and len(os.environ['CUDA_VERSION']) > 0:
|
if len(os.environ['CUDA_VERSION']) > 0:
|
||||||
|
warn((f'\n\n{"="*80}\n'
|
||||||
|
'WARNING: Manual override via CUDA_VERSION env variable detected!\n'
|
||||||
|
'CUDA_VERSION=XXX can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.\n'
|
||||||
|
'If this was unintended set the CUDA_VERSION variable to an empty string: export CUDA_VERSION=\n'
|
||||||
|
'If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH\n'
|
||||||
|
'For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64\n'
|
||||||
|
f'Loading CUDA version: CUDA_VERSION={os.environ["CUDA_VERSION"]}'
|
||||||
|
f'\n{"="*80}\n\n'))
|
||||||
self.binary_name = self.binary_name[:-6] + f'{os.environ["CUDA_VERSION"]}.so'
|
self.binary_name = self.binary_name[:-6] + f'{os.environ["CUDA_VERSION"]}.so'
|
||||||
|
|
||||||
def run_cuda_setup(self):
|
def run_cuda_setup(self):
|
||||||
|
@ -194,8 +202,8 @@ def remove_non_existent_dirs(candidate_paths: Set[Path]) -> Set[Path]:
|
||||||
|
|
||||||
non_existent_directories: Set[Path] = candidate_paths - existent_directories
|
non_existent_directories: Set[Path] = candidate_paths - existent_directories
|
||||||
if non_existent_directories:
|
if non_existent_directories:
|
||||||
CUDASetup.get_instance().add_log_entry("WARNING: The following directories listed in your path were found to "
|
CUDASetup.get_instance().add_log_entry("The following directories listed in your path were found to "
|
||||||
f"be non-existent: {non_existent_directories}", is_warning=True)
|
f"be non-existent: {non_existent_directories}", is_warning=False)
|
||||||
|
|
||||||
return existent_directories
|
return existent_directories
|
||||||
|
|
||||||
|
@ -229,11 +237,12 @@ def warn_in_case_of_duplicates(results_paths: Set[Path]) -> None:
|
||||||
f"Found duplicate {CUDA_RUNTIME_LIBS} files: {results_paths}.. "
|
f"Found duplicate {CUDA_RUNTIME_LIBS} files: {results_paths}.. "
|
||||||
"We select the PyTorch default libcudart.so, which is {torch.version.cuda},"
|
"We select the PyTorch default libcudart.so, which is {torch.version.cuda},"
|
||||||
"but this might missmatch with the CUDA version that is needed for bitsandbytes."
|
"but this might missmatch with the CUDA version that is needed for bitsandbytes."
|
||||||
"To override this behavior set the CUDA_HOME environmental variable"
|
"To override this behavior set the CUDA_VERSION=<version string, e.g. 122> environmental variable"
|
||||||
"For example, if you want to use the CUDA version wht the path"
|
"For example, if you want to use the CUDA version 122"
|
||||||
"/usr/local/cuda-11.2/lib/libcudart.so as the default,"
|
"CUDA_VERSION=122 python ..."
|
||||||
"then add the following to your .bashrc:"
|
"OR set the environmental variable in your .bashrc: export CUDA_VERSION=122"
|
||||||
"export CUDA_HOME=/usr/local/cuda-11.2")
|
"In the case of a manual override, make sure you set the LD_LIBRARY_PATH, e.g."
|
||||||
|
"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2")
|
||||||
CUDASetup.get_instance().add_log_entry(warning_msg, is_warning=True)
|
CUDASetup.get_instance().add_log_entry(warning_msg, is_warning=True)
|
||||||
|
|
||||||
|
|
||||||
|
@ -289,7 +298,8 @@ def determine_cuda_runtime_lib_path() -> Union[Path, None]:
|
||||||
|
|
||||||
warn_in_case_of_duplicates(cuda_runtime_libs)
|
warn_in_case_of_duplicates(cuda_runtime_libs)
|
||||||
|
|
||||||
print(cuda_runtime_libs, flush=True)
|
cuda_setup = CUDASetup.get_instance()
|
||||||
|
cuda_setup.add_log_entry(f'DEBUG: Possible options found for libcudart.so: {cuda_runtime_libs}')
|
||||||
|
|
||||||
return next(iter(cuda_runtime_libs)) if cuda_runtime_libs else None
|
return next(iter(cuda_runtime_libs)) if cuda_runtime_libs else None
|
||||||
|
|
||||||
|
@ -313,15 +323,15 @@ def get_compute_capabilities():
|
||||||
|
|
||||||
|
|
||||||
def evaluate_cuda_setup():
|
def evaluate_cuda_setup():
|
||||||
|
cuda_setup = CUDASetup.get_instance()
|
||||||
if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
|
if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
|
||||||
print('')
|
cuda_setup.add_log_entry('')
|
||||||
print('='*35 + 'BUG REPORT' + '='*35)
|
cuda_setup.add_log_entry('='*35 + 'BUG REPORT' + '='*35)
|
||||||
print(('Welcome to bitsandbytes. For bug reports, please run\n\npython -m bitsandbytes\n\n'),
|
cuda_setup.add_log_entry(('Welcome to bitsandbytes. For bug reports, please run\n\npython -m bitsandbytes\n\n'),
|
||||||
('and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues'))
|
('and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues'))
|
||||||
print('='*80)
|
cuda_setup.add_log_entry('='*80)
|
||||||
if not torch.cuda.is_available(): return 'libbitsandbytes_cpu.so', None, None, None, None
|
if not torch.cuda.is_available(): return 'libbitsandbytes_cpu.so', None, None, None, None
|
||||||
|
|
||||||
cuda_setup = CUDASetup.get_instance()
|
|
||||||
cudart_path = determine_cuda_runtime_lib_path()
|
cudart_path = determine_cuda_runtime_lib_path()
|
||||||
ccs = get_compute_capabilities()
|
ccs = get_compute_capabilities()
|
||||||
ccs.sort()
|
ccs.sort()
|
||||||
|
|
47
how_to_use_nonpytorch_cuda.md
Normal file
47
how_to_use_nonpytorch_cuda.md
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
## How to use a CUDA version that is different from PyTorch
|
||||||
|
|
||||||
|
Some features of bitsandbytes may need a newer CUDA version than regularly supported by PyTorch binaries from conda / pip. In that case you can use the following instructions to load a precompiled bitsandbytes binary that works for you.
|
||||||
|
|
||||||
|
## Installing or determining the CUDA installation
|
||||||
|
|
||||||
|
Determine the path of the CUDA version that you want to use. Common paths paths are:
|
||||||
|
```bash
|
||||||
|
/usr/local/cuda
|
||||||
|
/usr/local/cuda-XX.X
|
||||||
|
```
|
||||||
|
|
||||||
|
where XX.X is the CUDA version number.
|
||||||
|
|
||||||
|
You can also install CUDA version that you need locally with a script provided by bitsandbytes as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/cuda_install.sh
|
||||||
|
# Syntax cuda_install CUDA_VERSION INSTALL_PREFIX EXPORT_TO_BASH
|
||||||
|
# CUDA_VERSION in {110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122}
|
||||||
|
# EXPORT_TO_BASH in {0, 1} with 0=False and 1=True
|
||||||
|
|
||||||
|
# For example, the following installs CUDA 11.7 to ~/local/cuda-11.7 and exports the path to your .bashrc
|
||||||
|
bash cuda install 117 ~/local 1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Setting the environmental variables CUDA_HOME, CUDA_VERSION, and LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
To manually override the PyTorch installed CUDA version you need to set to variable, like so:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export CUDA_HOME=<PATH>
|
||||||
|
export CUDA_VERSION=<VERSION>
|
||||||
|
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<PATH>
|
||||||
|
```
|
||||||
|
|
||||||
|
For example, to use the local install path from above:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export CUDA_HOME=/home/tim/local/cuda-11.7
|
||||||
|
export CUDA_VERSION=117
|
||||||
|
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/tim/local/cuda-11.7
|
||||||
|
```
|
||||||
|
|
||||||
|
It is best to add these lines to the `.bashrc` file to make them permanent.
|
||||||
|
|
||||||
|
If you now launch bitsandbytes with these environmental variables the PyTorch CUDA version will be overridden by the new CUDA version and a different bitsandbytes library is loaded (in this case version 117).
|
Loading…
Reference in New Issue
Block a user