VALL-E config edits

2023-03-20 01:22:53 +00:00 · 2023-03-20 01:22:53 +00:00 · 34ef0467b9
commit 34ef0467b9
parent 2e33bf071a
3 changed files with 62 additions and 39 deletions
--- a/config/ds_config.json
+++ b/config/ds_config.json
@ -1,41 +1,61 @@
 {
-    "autotuning": {
+    "optimizer": {
-        "enabled": false,
+      "type": "AdamW",
-        "results_dir": "./config/autotune/results",
+      "params": {
-        "exps_dir": "./config/autotune/exps",
+        "lr": 2e-05,
-        "overwrite": false,
+        "betas": [
-        "metric": "throughput",
+          0.9,
-        "start_profile_step": 10,
+          0.96
-        "end_profile_step": 20,
+        ],
-        "fast": false,
+        "eps": 1e-07,
-        "max_train_batch_size": 32,
+        "weight_decay": 0.01
        "mp_size": 1,
        "num_tuning_micro_batch_sizes": 3,
        "tuner_type": "model_based",
        "tuner_early_stopping": 5,
        "tuner_num_trials": 50,
        "arg_mappings": {
            "train_micro_batch_size_per_gpu": "--per_device_train_batch_size",
            "gradient_accumulation_steps ": "--gradient_accumulation_steps"
      }
    },
-    "zero_optimization": {
+    "scheduler":{
-        "stage": 0,
+        "type":"WarmupLR",
-        "offload_param": {
+        "params":{
-            "device": "nvme",
+            "warmup_min_lr":0,
-            "nvme_path": "/tmp/zero/",
+            "warmup_max_lr":2e-5,
-            "pin_memory": false,
+            "warmup_num_steps":100,
-            "buffer_count": 5,
+            "warmup_type":"linear"
-            "buffer_size": 1e9,
+        }
            "max_in_cpu": 1e9
    },
-        "overlap_comm": true,
+    "fp16":{
-        "reduce_bucket_size": "auto",
+        "enabled":true,
-        "contiguous_gradients": true,
+        "loss_scale":0,
-        "sub_group_size": 1e8,
+        "loss_scale_window":1000,
-        "stage3_prefetch_bucket_size": "auto",
+        "initial_scale_power":16,
-        "stage3_param_persistence_threshold": "auto",
+        "hysteresis":2,
-        "stage3_max_live_parameters": "auto",
+        "min_loss_scale":1
-        "stage3_max_reuse_distance": "auto"
+    },
    "autotuning":{
        "enabled":false,
        "results_dir":"./config/autotune/results",
        "exps_dir":"./config/autotune/exps",
        "overwrite":false,
        "metric":"throughput",
        "start_profile_step":10,
        "end_profile_step":20,
        "fast":false,
        "max_train_batch_size":32,
        "mp_size":1,
        "num_tuning_micro_batch_sizes":3,
        "tuner_type":"model_based",
        "tuner_early_stopping":5,
        "tuner_num_trials":50,
        "arg_mappings":{
            "train_micro_batch_size_per_gpu":"--per_device_train_batch_size",
            "gradient_accumulation_steps ":"--gradient_accumulation_steps"
        }
    },
    "zero_optimization":{
        "stage":0,
        "reduce_bucket_size":"auto",
        "contiguous_gradients":true,
        "sub_group_size":1e8,
        "stage3_prefetch_bucket_size":"auto",
        "stage3_param_persistence_threshold":"auto",
        "stage3_max_live_parameters":"auto",
        "stage3_max_reuse_distance":"auto"
    }
 }
--- a/models/.template.valle.yaml
+++ b/models/.template.valle.yaml
@ -3,14 +3,17 @@ ckpt_root: ./training/${voice}/finetune/ckpt/
 log_root: ./training/${voice}/finetune/logs/
 data_dirs: [./training/${voice}/valle/]
-spkr_name_getter: "lambda p: p.parts[-3]"
+spkr_name_getter: "lambda p: p.parts[-3]" # "lambda p: p.parts[-1].split('-')[0]"
 model: ${model_name}
 batch_size: ${batch_size}
-eval_batch_size: ${validation_batch_size}
+gradient_accumulation_steps: ${gradient_accumulation_size}
 eval_batch_size: ${batch_size}
 max_iter: ${iterations}
 save_ckpt_every: ${save_rate}
 eval_every: ${validation_rate}
 max_phones: 256
 sampling_temperature: 1.0
--- a/src/webui.py
+++ b/src/webui.py
@ -488,7 +488,7 @@ def setup_gradio():
 							)
 						with gr.Row():
 							TRAINING_SETTINGS["batch_size"] = gr.Number(label="Batch Size", value=128, precision=0)
-							TRAINING_SETTINGS["gradient_accumulation_size"] = gr.Number(label="Gradient Accumulation Size", value=4, precision=0, visible=args.tts_backend=="tortoise")
+							TRAINING_SETTINGS["gradient_accumulation_size"] = gr.Number(label="Gradient Accumulation Size", value=4, precision=0)
 						with gr.Row():
 							TRAINING_SETTINGS["save_rate"] = gr.Number(label="Save Frequency (in epochs)", value=5, precision=0)
 							TRAINING_SETTINGS["validation_rate"] = gr.Number(label="Validation Frequency (in epochs)", value=5, precision=0)