bitsandbytes-rocm/bitsandbytes/cuda_setup/paths.py

127 lines
4.6 KiB
Python
Raw Normal View History

from pathlib import Path
from typing import Set, Union
from warnings import warn
from ..utils import print_stderr
from .env_vars import get_potentially_lib_path_containing_env_vars
CUDA_RUNTIME_LIB: str = "libcudart.so"
def purge_unwanted_semicolon(tentative_path: Path) -> Path:
"""
Special function to handle the following exception:
__LMOD_REF_COUNT_PATH=/sw/cuda/11.6.2/bin:2;/mmfs1/home/dettmers/git/sched/bin:1;/mmfs1/home/dettmers/data/anaconda3/bin:1;/mmfs1/home/dettmers/data/anaconda3/condabin:1;/mmfs1/home/dettmers/.local/bin:1;/mmfs1/home/dettmers/bin:1;/usr/local/bin:1;/usr/bin:1;/usr/local/sbin:1;/usr/sbin:1;/mmfs1/home/dettmers/.fzf/bin:1;/mmfs1/home/dettmers/data/local/cuda-11.4/bin:1
"""
# if ';' in str(tentative_path):
# path_as_str, _ = str(tentative_path).split(';')
pass
def extract_candidate_paths(paths_list_candidate: str) -> Set[Path]:
return {Path(ld_path) for ld_path in paths_list_candidate.split(":") if ld_path}
def remove_non_existent_dirs(candidate_paths: Set[Path]) -> Set[Path]:
non_existent_directories: Set[Path] = {
path for path in candidate_paths if not path.exists()
}
if non_existent_directories:
print_stderr(
"WARNING: The following directories listed in your path were found to "
f"be non-existent: {non_existent_directories}"
)
return candidate_paths - non_existent_directories
def get_cuda_runtime_lib_paths(candidate_paths: Set[Path]) -> Set[Path]:
return {
path / CUDA_RUNTIME_LIB
for path in candidate_paths
if (path / CUDA_RUNTIME_LIB).is_file()
}
def resolve_paths_list(paths_list_candidate: str) -> Set[Path]:
"""
Searches a given environmental var for the CUDA runtime library,
i.e. `libcudart.so`.
"""
return remove_non_existent_dirs(extract_candidate_paths(paths_list_candidate))
def find_cuda_lib_in(paths_list_candidate: str) -> Set[Path]:
return get_cuda_runtime_lib_paths(
resolve_paths_list(paths_list_candidate)
)
def warn_in_case_of_duplicates(results_paths: Set[Path]) -> None:
if len(results_paths) > 1:
warning_msg = (
f"Found duplicate {CUDA_RUNTIME_LIB} files: {results_paths}.. "
"We'll flip a coin and try one of these, in order to fail forward.\n"
"Either way, this might cause trouble in the future:\n"
"If you get `CUDA error: invalid device function` errors, the above "
"might be the cause and the solution is to make sure only one "
f"{CUDA_RUNTIME_LIB} in the paths that we search based on your env."
)
warn(warning_msg)
def determine_cuda_runtime_lib_path() -> Union[Path, None]:
"""
Searches for a cuda installations, in the following order of priority:
1. active conda env
2. LD_LIBRARY_PATH
3. any other env vars, while ignoring those that
- are known to be unrelated (see `bnb.cuda_setup.env_vars.to_be_ignored`)
- don't contain the path separator `/`
If multiple libraries are found in part 3, we optimistically try one,
while giving a warning message.
"""
candidate_env_vars = get_potentially_lib_path_containing_env_vars()
if "CONDA_PREFIX" in candidate_env_vars:
conda_libs_path = Path(candidate_env_vars["CONDA_PREFIX"]) / "lib"
conda_cuda_libs = find_cuda_lib_in(str(conda_libs_path))
warn_in_case_of_duplicates(conda_cuda_libs)
if conda_cuda_libs:
return next(iter(conda_cuda_libs))
warn(
f'{candidate_env_vars["CONDA_PREFIX"]} did not contain '
f'{CUDA_RUNTIME_LIB} as expected! Searching further paths...'
)
if "LD_LIBRARY_PATH" in candidate_env_vars:
lib_ld_cuda_libs = find_cuda_lib_in(candidate_env_vars["LD_LIBRARY_PATH"])
if lib_ld_cuda_libs:
return next(iter(lib_ld_cuda_libs))
warn_in_case_of_duplicates(lib_ld_cuda_libs)
warn(
f'{candidate_env_vars["LD_LIBRARY_PATH"]} did not contain '
f'{CUDA_RUNTIME_LIB} as expected! Searching further paths...'
)
remaining_candidate_env_vars = {
env_var: value for env_var, value in candidate_env_vars.items()
if env_var not in {"CONDA_PREFIX", "LD_LIBRARY_PATH"}
}
cuda_runtime_libs = set()
for env_var, value in remaining_candidate_env_vars:
cuda_runtime_libs.update(find_cuda_lib_in(value))
warn_in_case_of_duplicates(cuda_runtime_libs)
return next(iter(cuda_runtime_libs)) if cuda_runtime_libs else set()