doing what I do best: sourcing other configs and banging until it works (it doesnt work)

2023-03-18 15:16:15 +00:00 · 2023-03-18 15:16:15 +00:00 · 74510e8623
commit 74510e8623
parent da9b4b5fb5
2 changed files with 41 additions and 0 deletions
--- a/config/.gitkeep
+++ b/config/.gitkeep
--- a/config/ds_config.json
+++ b/config/ds_config.json
@ -0,0 +1,41 @@
 {
    "autotuning": {
        "enabled": false,
        "results_dir": "./config/autotune/results",
        "exps_dir": "./config/autotune/exps",
        "overwrite": false,
        "metric": "throughput",
        "start_profile_step": 10,
        "end_profile_step": 20,
        "fast": false,
        "max_train_batch_size": 32,
        "mp_size": 1,
        "num_tuning_micro_batch_sizes": 3,
        "tuner_type": "model_based",
        "tuner_early_stopping": 5,
        "tuner_num_trials": 50,
        "arg_mappings": {
            "train_micro_batch_size_per_gpu": "--per_device_train_batch_size",
            "gradient_accumulation_steps ": "--gradient_accumulation_steps"
        }
    },
    "zero_optimization": {
        "stage": 0,
        "offload_param": {
            "device": "nvme",
            "nvme_path": "/tmp/zero/",
            "pin_memory": false,
            "buffer_count": 5,
            "buffer_size": 1e9,
            "max_in_cpu": 1e9
        },
        "overlap_comm": true,
        "reduce_bucket_size": "auto",
        "contiguous_gradients": true,
        "sub_group_size": 1e8,
        "stage3_prefetch_bucket_size": "auto",
        "stage3_param_persistence_threshold": "auto",
        "stage3_max_live_parameters": "auto",
        "stage3_max_reuse_distance": "auto"
    }
 }