{ "optimizer": { "type": "AdamW", "params": { "lr": 2e-05, "betas": [ 0.9, 0.96 ], "eps": 1e-07, "weight_decay": 0.01 } }, "scheduler":{ "type":"WarmupLR", "params":{ "warmup_min_lr":0, "warmup_max_lr":2e-5, "warmup_num_steps":100, "warmup_type":"linear" } }, "fp16":{ "enabled":true, "loss_scale":0, "loss_scale_window":1000, "initial_scale_power":16, "hysteresis":2, "min_loss_scale":1 }, "autotuning":{ "enabled":false, "results_dir":"./config/autotune/results", "exps_dir":"./config/autotune/exps", "overwrite":false, "metric":"throughput", "start_profile_step":10, "end_profile_step":20, "fast":false, "max_train_batch_size":32, "mp_size":1, "num_tuning_micro_batch_sizes":3, "tuner_type":"model_based", "tuner_early_stopping":5, "tuner_num_trials":50, "arg_mappings":{ "train_micro_batch_size_per_gpu":"--per_device_train_batch_size", "gradient_accumulation_steps ":"--gradient_accumulation_steps" } }, "zero_optimization":{ "stage":0, "reduce_bucket_size":"auto", "contiguous_gradients":true, "sub_group_size":1e8, "stage3_prefetch_bucket_size":"auto", "stage3_param_persistence_threshold":"auto", "stage3_max_live_parameters":"auto", "stage3_max_reuse_distance":"auto" } }