diff --git a/config/ds_config.json b/config/ds_config.json index 6cf23e1..47827ca 100755 --- a/config/ds_config.json +++ b/config/ds_config.json @@ -1,41 +1,61 @@ { - "autotuning": { - "enabled": false, - "results_dir": "./config/autotune/results", - "exps_dir": "./config/autotune/exps", - "overwrite": false, - "metric": "throughput", - "start_profile_step": 10, - "end_profile_step": 20, - "fast": false, - "max_train_batch_size": 32, - "mp_size": 1, - "num_tuning_micro_batch_sizes": 3, - "tuner_type": "model_based", - "tuner_early_stopping": 5, - "tuner_num_trials": 50, - "arg_mappings": { - "train_micro_batch_size_per_gpu": "--per_device_train_batch_size", - "gradient_accumulation_steps ": "--gradient_accumulation_steps" + "optimizer": { + "type": "AdamW", + "params": { + "lr": 2e-05, + "betas": [ + 0.9, + 0.96 + ], + "eps": 1e-07, + "weight_decay": 0.01 + } + }, + "scheduler":{ + "type":"WarmupLR", + "params":{ + "warmup_min_lr":0, + "warmup_max_lr":2e-5, + "warmup_num_steps":100, + "warmup_type":"linear" } }, - "zero_optimization": { - "stage": 0, - "offload_param": { - "device": "nvme", - "nvme_path": "/tmp/zero/", - "pin_memory": false, - "buffer_count": 5, - "buffer_size": 1e9, - "max_in_cpu": 1e9 - }, - "overlap_comm": true, - "reduce_bucket_size": "auto", - "contiguous_gradients": true, - "sub_group_size": 1e8, - "stage3_prefetch_bucket_size": "auto", - "stage3_param_persistence_threshold": "auto", - "stage3_max_live_parameters": "auto", - "stage3_max_reuse_distance": "auto" + "fp16":{ + "enabled":true, + "loss_scale":0, + "loss_scale_window":1000, + "initial_scale_power":16, + "hysteresis":2, + "min_loss_scale":1 + }, + "autotuning":{ + "enabled":false, + "results_dir":"./config/autotune/results", + "exps_dir":"./config/autotune/exps", + "overwrite":false, + "metric":"throughput", + "start_profile_step":10, + "end_profile_step":20, + "fast":false, + "max_train_batch_size":32, + "mp_size":1, + "num_tuning_micro_batch_sizes":3, + "tuner_type":"model_based", + "tuner_early_stopping":5, + "tuner_num_trials":50, + "arg_mappings":{ + "train_micro_batch_size_per_gpu":"--per_device_train_batch_size", + "gradient_accumulation_steps ":"--gradient_accumulation_steps" + } + }, + "zero_optimization":{ + "stage":0, + "reduce_bucket_size":"auto", + "contiguous_gradients":true, + "sub_group_size":1e8, + "stage3_prefetch_bucket_size":"auto", + "stage3_param_persistence_threshold":"auto", + "stage3_max_live_parameters":"auto", + "stage3_max_reuse_distance":"auto" } } \ No newline at end of file diff --git a/models/.template.valle.yaml b/models/.template.valle.yaml index c9389ed..d3458f2 100755 --- a/models/.template.valle.yaml +++ b/models/.template.valle.yaml @@ -3,14 +3,17 @@ ckpt_root: ./training/${voice}/finetune/ckpt/ log_root: ./training/${voice}/finetune/logs/ data_dirs: [./training/${voice}/valle/] -spkr_name_getter: "lambda p: p.parts[-3]" +spkr_name_getter: "lambda p: p.parts[-3]" # "lambda p: p.parts[-1].split('-')[0]" model: ${model_name} batch_size: ${batch_size} -eval_batch_size: ${validation_batch_size} +gradient_accumulation_steps: ${gradient_accumulation_size} +eval_batch_size: ${batch_size} max_iter: ${iterations} save_ckpt_every: ${save_rate} eval_every: ${validation_rate} +max_phones: 256 + sampling_temperature: 1.0 \ No newline at end of file diff --git a/src/webui.py b/src/webui.py index 4bffa29..7436739 100755 --- a/src/webui.py +++ b/src/webui.py @@ -488,7 +488,7 @@ def setup_gradio(): ) with gr.Row(): TRAINING_SETTINGS["batch_size"] = gr.Number(label="Batch Size", value=128, precision=0) - TRAINING_SETTINGS["gradient_accumulation_size"] = gr.Number(label="Gradient Accumulation Size", value=4, precision=0, visible=args.tts_backend=="tortoise") + TRAINING_SETTINGS["gradient_accumulation_size"] = gr.Number(label="Gradient Accumulation Size", value=4, precision=0) with gr.Row(): TRAINING_SETTINGS["save_rate"] = gr.Number(label="Save Frequency (in epochs)", value=5, precision=0) TRAINING_SETTINGS["validation_rate"] = gr.Number(label="Validation Frequency (in epochs)", value=5, precision=0)