{
    "optimizer": {
      "type": "AdamW",
      "params": {
        "lr": 2e-05,
        "betas": [
          0.9,
          0.96
        ],
        "eps": 1e-07,
        "weight_decay": 0.01
      }
    },
    "scheduler":{
        "type":"WarmupLR",
        "params":{
            "warmup_min_lr":0,
            "warmup_max_lr":2e-5,
            "warmup_num_steps":100,
            "warmup_type":"linear"
        }
    },
    "fp16":{
        "enabled":true,
        "loss_scale":0,
        "loss_scale_window":1000,
        "initial_scale_power":16,
        "hysteresis":2,
        "min_loss_scale":1
    },
    "autotuning":{
        "enabled":false,
        "results_dir":"./config/autotune/results",
        "exps_dir":"./config/autotune/exps",
        "overwrite":false,
        "metric":"throughput",
        "start_profile_step":10,
        "end_profile_step":20,
        "fast":false,
        "max_train_batch_size":32,
        "mp_size":1,
        "num_tuning_micro_batch_sizes":3,
        "tuner_type":"model_based",
        "tuner_early_stopping":5,
        "tuner_num_trials":50,
        "arg_mappings":{
            "train_micro_batch_size_per_gpu":"--per_device_train_batch_size",
            "gradient_accumulation_steps ":"--gradient_accumulation_steps"
        }
    },
    "zero_optimization":{
        "stage":0,
        "reduce_bucket_size":"auto",
        "contiguous_gradients":true,
        "sub_group_size":1e8,
        "stage3_prefetch_bucket_size":"auto",
        "stage3_param_persistence_threshold":"auto",
        "stage3_max_live_parameters":"auto",
        "stage3_max_reuse_distance":"auto"
    }
}