From a657623cbc4de4b3077b94212eb89573ece9fad3 Mon Sep 17 00:00:00 2001 From: mrq Date: Thu, 24 Aug 2023 21:45:50 +0000 Subject: [PATCH] updated vall-e training template to use path-based speakers because it would just have a batch/epoch size of 1 otherwise; revert hardcoded 'spit processed dataset to this path' from my training rig to spit it out in a sane spot --- models/.template.valle.yaml | 2 +- src/utils.py | 12 ++++++------ src/webui.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/models/.template.valle.yaml b/models/.template.valle.yaml index 7c2fb39..956612b 100755 --- a/models/.template.valle.yaml +++ b/models/.template.valle.yaml @@ -23,7 +23,7 @@ dataset: max_prompts: 3 prompt_duration: 3.0 - sample_type: speaker + sample_type: path tasks_list: ["tts"] # ["tts", "ns", "sr", "tse", "cse", "nse", "tts"] diff --git a/src/utils.py b/src/utils.py index df6bf11..94e7769 100755 --- a/src/utils.py +++ b/src/utils.py @@ -2662,11 +2662,11 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p if culled or args.tts_backend != "vall-e": continue - # os.makedirs(f'{indir}/valle/', exist_ok=True) - os.makedirs(f'./training/valle/data/{voice}/', exist_ok=True) + os.makedirs(f'{indir}/valle/', exist_ok=True) + #os.makedirs(f'./training/valle/data/{voice}/', exist_ok=True) - #phn_file = f'{indir}/valle/{file.replace(f".{extension}",".phn.txt")}' - phn_file = f'./training/valle/data/{voice}/{file.replace(f".{extension}",".phn.txt")}' + phn_file = f'{indir}/valle/{file.replace(f".{extension}",".phn.txt")}' + #phn_file = f'./training/valle/data/{voice}/{file.replace(f".{extension}",".phn.txt")}' if not os.path.exists(phn_file): jobs['phonemize'][0].append(phn_file) jobs['phonemize'][1].append(normalized) @@ -2676,8 +2676,8 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p print("Phonemized:", file, normalized, text) """ - #qnt_file = f'{indir}/valle/{file.replace(f".{extension}",".qnt.pt")}' - qnt_file = f'./training/valle/data/{voice}/{file.replace(f".{extension}",".qnt.pt")}' + qnt_file = f'{indir}/valle/{file.replace(f".{extension}",".qnt.pt")}' + #qnt_file = f'./training/valle/data/{voice}/{file.replace(f".{extension}",".qnt.pt")}' if 'error' not in result: if not quantize_in_memory and not os.path.exists(path): message = f"Missing segment, skipping... {file}" diff --git a/src/webui.py b/src/webui.py index 3ab5012..402bb2b 100755 --- a/src/webui.py +++ b/src/webui.py @@ -411,7 +411,7 @@ def setup_gradio(): GENERATE_SETTINGS["num_autoregressive_samples"] = gr.Slider(value=16, minimum=2, maximum=2048 if args.tts_backend=="vall-e" else 512, step=1, label="Samples", visible=args.tts_backend!="bark") GENERATE_SETTINGS["diffusion_iterations"] = gr.Slider(value=30, minimum=0, maximum=512, step=1, label="Iterations", visible=args.tts_backend=="tortoise") - GENERATE_SETTINGS["temperature"] = gr.Slider(value=0.95 if args.tts_backend=="vall-e" else 0.2, minimum=0, maximum=1, step=0.1, label="Temperature") + GENERATE_SETTINGS["temperature"] = gr.Slider(value=0.95 if args.tts_backend=="vall-e" else 0.2, minimum=0, maximum=1, step=0.05, label="Temperature") show_experimental_settings = gr.Checkbox(label="Show Experimental Settings", visible=args.tts_backend=="tortoise") reset_generate_settings_button = gr.Button(value="Reset to Default")