doing what I do best: sourcing other configs and banging until it works (it doesnt work)
This commit is contained in:
parent
da9b4b5fb5
commit
74510e8623
41
config/ds_config.json
Executable file
41
config/ds_config.json
Executable file
|
@ -0,0 +1,41 @@
|
|||
{
|
||||
"autotuning": {
|
||||
"enabled": false,
|
||||
"results_dir": "./config/autotune/results",
|
||||
"exps_dir": "./config/autotune/exps",
|
||||
"overwrite": false,
|
||||
"metric": "throughput",
|
||||
"start_profile_step": 10,
|
||||
"end_profile_step": 20,
|
||||
"fast": false,
|
||||
"max_train_batch_size": 32,
|
||||
"mp_size": 1,
|
||||
"num_tuning_micro_batch_sizes": 3,
|
||||
"tuner_type": "model_based",
|
||||
"tuner_early_stopping": 5,
|
||||
"tuner_num_trials": 50,
|
||||
"arg_mappings": {
|
||||
"train_micro_batch_size_per_gpu": "--per_device_train_batch_size",
|
||||
"gradient_accumulation_steps ": "--gradient_accumulation_steps"
|
||||
}
|
||||
},
|
||||
"zero_optimization": {
|
||||
"stage": 0,
|
||||
"offload_param": {
|
||||
"device": "nvme",
|
||||
"nvme_path": "/tmp/zero/",
|
||||
"pin_memory": false,
|
||||
"buffer_count": 5,
|
||||
"buffer_size": 1e9,
|
||||
"max_in_cpu": 1e9
|
||||
},
|
||||
"overlap_comm": true,
|
||||
"reduce_bucket_size": "auto",
|
||||
"contiguous_gradients": true,
|
||||
"sub_group_size": 1e8,
|
||||
"stage3_prefetch_bucket_size": "auto",
|
||||
"stage3_param_persistence_threshold": "auto",
|
||||
"stage3_max_live_parameters": "auto",
|
||||
"stage3_max_reuse_distance": "auto"
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user