2023-03-18 15:16:15 +00:00
|
|
|
{
|
2023-03-20 01:22:53 +00:00
|
|
|
"optimizer": {
|
|
|
|
"type": "AdamW",
|
|
|
|
"params": {
|
|
|
|
"lr": 2e-05,
|
|
|
|
"betas": [
|
|
|
|
0.9,
|
|
|
|
0.96
|
|
|
|
],
|
|
|
|
"eps": 1e-07,
|
|
|
|
"weight_decay": 0.01
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"scheduler":{
|
|
|
|
"type":"WarmupLR",
|
|
|
|
"params":{
|
|
|
|
"warmup_min_lr":0,
|
|
|
|
"warmup_max_lr":2e-5,
|
|
|
|
"warmup_num_steps":100,
|
|
|
|
"warmup_type":"linear"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"fp16":{
|
|
|
|
"enabled":true,
|
|
|
|
"loss_scale":0,
|
|
|
|
"loss_scale_window":1000,
|
|
|
|
"initial_scale_power":16,
|
|
|
|
"hysteresis":2,
|
|
|
|
"min_loss_scale":1
|
|
|
|
},
|
|
|
|
"autotuning":{
|
|
|
|
"enabled":false,
|
|
|
|
"results_dir":"./config/autotune/results",
|
|
|
|
"exps_dir":"./config/autotune/exps",
|
|
|
|
"overwrite":false,
|
|
|
|
"metric":"throughput",
|
|
|
|
"start_profile_step":10,
|
|
|
|
"end_profile_step":20,
|
|
|
|
"fast":false,
|
|
|
|
"max_train_batch_size":32,
|
|
|
|
"mp_size":1,
|
|
|
|
"num_tuning_micro_batch_sizes":3,
|
|
|
|
"tuner_type":"model_based",
|
|
|
|
"tuner_early_stopping":5,
|
|
|
|
"tuner_num_trials":50,
|
|
|
|
"arg_mappings":{
|
|
|
|
"train_micro_batch_size_per_gpu":"--per_device_train_batch_size",
|
|
|
|
"gradient_accumulation_steps ":"--gradient_accumulation_steps"
|
2023-03-18 15:16:15 +00:00
|
|
|
}
|
|
|
|
},
|
2023-03-20 01:22:53 +00:00
|
|
|
"zero_optimization":{
|
|
|
|
"stage":0,
|
|
|
|
"reduce_bucket_size":"auto",
|
|
|
|
"contiguous_gradients":true,
|
|
|
|
"sub_group_size":1e8,
|
|
|
|
"stage3_prefetch_bucket_size":"auto",
|
|
|
|
"stage3_param_persistence_threshold":"auto",
|
|
|
|
"stage3_max_live_parameters":"auto",
|
|
|
|
"stage3_max_reuse_distance":"auto"
|
2023-03-18 15:16:15 +00:00
|
|
|
}
|
|
|
|
}
|