doing what I do best: sourcing other configs and banging until it works (it doesnt work)

This commit is contained in:
mrq 2023-03-18 15:16:15 +00:00
parent da9b4b5fb5
commit 74510e8623
2 changed files with 41 additions and 0 deletions

View File

41
config/ds_config.json Executable file
View File

@ -0,0 +1,41 @@
{
"autotuning": {
"enabled": false,
"results_dir": "./config/autotune/results",
"exps_dir": "./config/autotune/exps",
"overwrite": false,
"metric": "throughput",
"start_profile_step": 10,
"end_profile_step": 20,
"fast": false,
"max_train_batch_size": 32,
"mp_size": 1,
"num_tuning_micro_batch_sizes": 3,
"tuner_type": "model_based",
"tuner_early_stopping": 5,
"tuner_num_trials": 50,
"arg_mappings": {
"train_micro_batch_size_per_gpu": "--per_device_train_batch_size",
"gradient_accumulation_steps ": "--gradient_accumulation_steps"
}
},
"zero_optimization": {
"stage": 0,
"offload_param": {
"device": "nvme",
"nvme_path": "/tmp/zero/",
"pin_memory": false,
"buffer_count": 5,
"buffer_size": 1e9,
"max_in_cpu": 1e9
},
"overlap_comm": true,
"reduce_bucket_size": "auto",
"contiguous_gradients": true,
"sub_group_size": 1e8,
"stage3_prefetch_bucket_size": "auto",
"stage3_param_persistence_threshold": "auto",
"stage3_max_live_parameters": "auto",
"stage3_max_reuse_distance": "auto"
}
}