From 34ef0467b901623f1160084b529392df338e3c35 Mon Sep 17 00:00:00 2001
From: mrq <mrq@ecker.tech>
Date: Mon, 20 Mar 2023 01:22:53 +0000
Subject: [PATCH] VALL-E config edits

---
 config/ds_config.json       | 92 ++++++++++++++++++++++---------------
 models/.template.valle.yaml |  7 ++-
 src/webui.py                |  2 +-
 3 files changed, 62 insertions(+), 39 deletions(-)

diff --git a/config/ds_config.json b/config/ds_config.json
index 6cf23e1..47827ca 100755
--- a/config/ds_config.json
+++ b/config/ds_config.json
@@ -1,41 +1,61 @@
 {
-    "autotuning": {
-        "enabled": false,
-        "results_dir": "./config/autotune/results",
-        "exps_dir": "./config/autotune/exps",
-        "overwrite": false,
-        "metric": "throughput",
-        "start_profile_step": 10,
-        "end_profile_step": 20,
-        "fast": false,
-        "max_train_batch_size": 32,
-        "mp_size": 1,
-        "num_tuning_micro_batch_sizes": 3,
-        "tuner_type": "model_based",
-        "tuner_early_stopping": 5,
-        "tuner_num_trials": 50,
-        "arg_mappings": {
-            "train_micro_batch_size_per_gpu": "--per_device_train_batch_size",
-            "gradient_accumulation_steps ": "--gradient_accumulation_steps"
+    "optimizer": {
+      "type": "AdamW",
+      "params": {
+        "lr": 2e-05,
+        "betas": [
+          0.9,
+          0.96
+        ],
+        "eps": 1e-07,
+        "weight_decay": 0.01
+      }
+    },
+    "scheduler":{
+        "type":"WarmupLR",
+        "params":{
+            "warmup_min_lr":0,
+            "warmup_max_lr":2e-5,
+            "warmup_num_steps":100,
+            "warmup_type":"linear"
         }
     },
-    "zero_optimization": {
-        "stage": 0,
-        "offload_param": {
-            "device": "nvme",
-            "nvme_path": "/tmp/zero/",
-            "pin_memory": false,
-            "buffer_count": 5,
-            "buffer_size": 1e9,
-            "max_in_cpu": 1e9
-        },
-        "overlap_comm": true,
-        "reduce_bucket_size": "auto",
-        "contiguous_gradients": true,
-        "sub_group_size": 1e8,
-        "stage3_prefetch_bucket_size": "auto",
-        "stage3_param_persistence_threshold": "auto",
-        "stage3_max_live_parameters": "auto",
-        "stage3_max_reuse_distance": "auto"
+    "fp16":{
+        "enabled":true,
+        "loss_scale":0,
+        "loss_scale_window":1000,
+        "initial_scale_power":16,
+        "hysteresis":2,
+        "min_loss_scale":1
+    },
+    "autotuning":{
+        "enabled":false,
+        "results_dir":"./config/autotune/results",
+        "exps_dir":"./config/autotune/exps",
+        "overwrite":false,
+        "metric":"throughput",
+        "start_profile_step":10,
+        "end_profile_step":20,
+        "fast":false,
+        "max_train_batch_size":32,
+        "mp_size":1,
+        "num_tuning_micro_batch_sizes":3,
+        "tuner_type":"model_based",
+        "tuner_early_stopping":5,
+        "tuner_num_trials":50,
+        "arg_mappings":{
+            "train_micro_batch_size_per_gpu":"--per_device_train_batch_size",
+            "gradient_accumulation_steps ":"--gradient_accumulation_steps"
+        }
+    },
+    "zero_optimization":{
+        "stage":0,
+        "reduce_bucket_size":"auto",
+        "contiguous_gradients":true,
+        "sub_group_size":1e8,
+        "stage3_prefetch_bucket_size":"auto",
+        "stage3_param_persistence_threshold":"auto",
+        "stage3_max_live_parameters":"auto",
+        "stage3_max_reuse_distance":"auto"
     }
 }
\ No newline at end of file
diff --git a/models/.template.valle.yaml b/models/.template.valle.yaml
index c9389ed..d3458f2 100755
--- a/models/.template.valle.yaml
+++ b/models/.template.valle.yaml
@@ -3,14 +3,17 @@ ckpt_root: ./training/${voice}/finetune/ckpt/
 log_root: ./training/${voice}/finetune/logs/
 
 data_dirs: [./training/${voice}/valle/]
-spkr_name_getter: "lambda p: p.parts[-3]"
+spkr_name_getter: "lambda p: p.parts[-3]" # "lambda p: p.parts[-1].split('-')[0]"
 
 model: ${model_name}
 batch_size: ${batch_size}
-eval_batch_size: ${validation_batch_size}
+gradient_accumulation_steps: ${gradient_accumulation_size}
+eval_batch_size: ${batch_size}
 
 max_iter: ${iterations}
 save_ckpt_every: ${save_rate}
 eval_every: ${validation_rate}
 
+max_phones: 256
+
 sampling_temperature: 1.0
\ No newline at end of file
diff --git a/src/webui.py b/src/webui.py
index 4bffa29..7436739 100755
--- a/src/webui.py
+++ b/src/webui.py
@@ -488,7 +488,7 @@ def setup_gradio():
 							)
 						with gr.Row():
 							TRAINING_SETTINGS["batch_size"] = gr.Number(label="Batch Size", value=128, precision=0)
-							TRAINING_SETTINGS["gradient_accumulation_size"] = gr.Number(label="Gradient Accumulation Size", value=4, precision=0, visible=args.tts_backend=="tortoise")
+							TRAINING_SETTINGS["gradient_accumulation_size"] = gr.Number(label="Gradient Accumulation Size", value=4, precision=0)
 						with gr.Row():
 							TRAINING_SETTINGS["save_rate"] = gr.Number(label="Save Frequency (in epochs)", value=5, precision=0)
 							TRAINING_SETTINGS["validation_rate"] = gr.Number(label="Validation Frequency (in epochs)", value=5, precision=0)