diff --git a/src/webui.py b/src/webui.py index 560453d..3d14b50 100755 --- a/src/webui.py +++ b/src/webui.py @@ -92,7 +92,7 @@ def generate_proxy( unload_tts() raise e - + return ( outputs[0], gr.update(value=sample, visible=sample is not None), @@ -131,7 +131,7 @@ def history_view_results( voice ): metadata, _ = read_generate_settings(f"{outdir}/{file}", read_latents=False) if metadata is None: continue - + values = [] for k in HISTORY_HEADERS: v = file @@ -185,7 +185,7 @@ def read_generate_settings_proxy(file, saveAs='.temp'): os.makedirs(outdir, exist_ok=True) with open(f'{outdir}/cond_latents.pth', 'wb') as f: f.write(latents) - + latents = f'{outdir}/cond_latents.pth' return ( @@ -229,7 +229,7 @@ def prepare_all_datasets( language, validation_text_length, validation_audio_len print("Processing:", voice) message = slice_dataset( voice, trim_silence=trim_silence, start_offset=slice_start_offset, end_offset=slice_end_offset, results=None, progress=progress ) messages.append(message) - + for voice in voices: print("Processing:", voice) message = prepare_dataset( voice, use_segments=slice_audio, text_length=validation_text_length, audio_length=validation_audio_length, progress=progress ) @@ -239,7 +239,7 @@ def prepare_all_datasets( language, validation_text_length, validation_audio_len def prepare_dataset_proxy( voice, language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=True) ): messages = [] - + message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress ) messages.append(message) @@ -355,7 +355,7 @@ def setup_gradio(): voice_list_with_defaults = get_voice_list(append_defaults=True) voice_list = get_voice_list() result_voices = get_voice_list(args.results_folder) - + valle_models = get_valle_models() autoregressive_models = get_autoregressive_models() @@ -371,7 +371,9 @@ def setup_gradio(): arg = GENERATE_SETTINGS_ARGS[i] GENERATE_SETTINGS[arg] = None - with gr.Blocks() as ui: + with gr.Blocks(theme="freddyaboulton/dracula_revamped", css="footer { display: none!important}", title="Voice Clonning WebUI") as ui: + gr.Markdown("## 🤗🎙️ Voice clonning ") + gr.Markdown("Ai Voice clonning based on Tortoise") with gr.Tab("Generate"): with gr.Row(): with gr.Column(): @@ -402,7 +404,7 @@ def setup_gradio(): outputs=GENERATE_SETTINGS["mic_audio"], ) with gr.Column(): - preset = None + preset = None GENERATE_SETTINGS["candidates"] = gr.Slider(value=1, minimum=1, maximum=6, step=1, label="Candidates", visible=args.tts_backend=="tortoise") GENERATE_SETTINGS["seed"] = gr.Number(value=0, precision=0, label="Seed", visible=args.tts_backend=="tortoise") @@ -412,7 +414,7 @@ def setup_gradio(): GENERATE_SETTINGS["diffusion_iterations"] = gr.Slider(value=30, minimum=0, maximum=512, step=1, label="Iterations", visible=args.tts_backend=="tortoise") GENERATE_SETTINGS["temperature"] = gr.Slider(value=0.95 if args.tts_backend=="vall-e" else 0.2, minimum=0, maximum=1, step=0.05, label="Temperature") - + show_experimental_settings = gr.Checkbox(label="Show Experimental Settings", visible=args.tts_backend=="tortoise") reset_generate_settings_button = gr.Button(value="Reset to Default") with gr.Column(visible=False) as col: @@ -514,7 +516,7 @@ def setup_gradio(): transcribe_button = gr.Button(value="Transcribe and Process") transcribe_all_button = gr.Button(value="Transcribe All") diarize_button = gr.Button(value="Diarize", visible=False) - + with gr.Row(): slice_dataset_button = gr.Button(value="(Re)Slice Audio") prepare_dataset_button = gr.Button(value="(Re)Create Dataset") @@ -534,7 +536,7 @@ def setup_gradio(): TRAINING_SETTINGS["learning_rate"] = gr.Slider(label="Learning Rate", value=1e-5, minimum=0, maximum=1e-4, step=1e-6) TRAINING_SETTINGS["mel_lr_weight"] = gr.Slider(label="Mel LR Ratio", value=1.00, minimum=0, maximum=1) TRAINING_SETTINGS["text_lr_weight"] = gr.Slider(label="Text LR Ratio", value=0.01, minimum=0, maximum=1) - + with gr.Row(visible=args.tts_backend=="tortoise"): lr_schemes = list(LEARNING_RATE_SCHEMES.keys()) TRAINING_SETTINGS["learning_rate_scheme"] = gr.Radio(lr_schemes, label="Learning Rate Scheme", value=lr_schemes[0], type="value") @@ -567,7 +569,7 @@ def setup_gradio(): TRAINING_SETTINGS["source_model"] = gr.Dropdown( choices=autoregressive_models, label="Source Model", type="value", value=autoregressive_models[0], visible=args.tts_backend=="tortoise" ) TRAINING_SETTINGS["resume_state"] = gr.Textbox(label="Resume State Path", placeholder="./training/${voice}/finetune/training_state/${last_state}.state", visible=args.tts_backend=="tortoise") - + TRAINING_SETTINGS["voice"] = gr.Dropdown( choices=dataset_list, label="Dataset", type="value", value=dataset_list[0] if len(dataset_list) else "" ) with gr.Row(): @@ -585,9 +587,9 @@ def setup_gradio(): refresh_configs = gr.Button(value="Refresh Configurations") training_output = gr.TextArea(label="Console Output", interactive=False, max_lines=8) verbose_training = gr.Checkbox(label="Verbose Console Output", value=True) - + keep_x_past_checkpoints = gr.Slider(label="Keep X Previous States", minimum=0, maximum=8, value=0, step=1) - + with gr.Row(): training_graph_x_min = gr.Number(label="X Min", precision=0, value=0) training_graph_x_max = gr.Number(label="X Max", precision=0, value=0) @@ -598,8 +600,8 @@ def setup_gradio(): start_training_button = gr.Button(value="Train") stop_training_button = gr.Button(value="Stop") reconnect_training_button = gr.Button(value="Reconnect") - - + + with gr.Column(): training_loss_graph = gr.LinePlot(label="Training Metrics", x="it", # x="epoch", @@ -658,7 +660,7 @@ def setup_gradio(): EXEC_SETTINGS['results_folder'] = gr.Textbox(label="Results Folder", value=args.results_folder) # EXEC_SETTINGS['tts_backend'] = gr.Dropdown(TTSES, label="TTS Backend", value=args.tts_backend if args.tts_backend else TTSES[0]) - + if args.tts_backend=="vall-e": with gr.Column(): EXEC_SETTINGS['valle_model'] = gr.Dropdown(choices=valle_models, label="VALL-E Model Config", value=args.valle_model if args.valle_model else valle_models[0]) @@ -668,7 +670,7 @@ def setup_gradio(): EXEC_SETTINGS['diffusion_model'] = gr.Dropdown(choices=diffusion_models, label="Diffusion Model", value=args.diffusion_model if args.diffusion_model else diffusion_models[0]) EXEC_SETTINGS['vocoder_model'] = gr.Dropdown(VOCODERS, label="Vocoder", value=args.vocoder_model if args.vocoder_model else VOCODERS[-1]) EXEC_SETTINGS['tokenizer_json'] = gr.Dropdown(tokenizer_jsons, label="Tokenizer JSON Path", value=args.tokenizer_json if args.tokenizer_json else tokenizer_jsons[0]) - + EXEC_SETTINGS['training_default_halfp'] = TRAINING_SETTINGS['half_p'] EXEC_SETTINGS['training_default_bnb'] = TRAINING_SETTINGS['bitsandbytes'] @@ -718,7 +720,7 @@ def setup_gradio(): exec_inputs = list(EXEC_SETTINGS.values()) for k in EXEC_SETTINGS: EXEC_SETTINGS[k].change( fn=update_args_proxy, inputs=exec_inputs ) - + EXEC_SETTINGS['autoregressive_model'].change( fn=update_autoregressive_model, inputs=EXEC_SETTINGS['autoregressive_model'], @@ -790,7 +792,7 @@ def setup_gradio(): ], outputs=GENERATE_SETTINGS['voice'], ) - + GENERATE_SETTINGS['emotion'].change( fn=lambda value: gr.update(visible=value == "Custom"), inputs=GENERATE_SETTINGS['emotion'], @@ -944,7 +946,7 @@ def setup_gradio(): ], outputs=prepare_dataset_output ) - + training_refresh_dataset.click( lambda: gr.update(choices=get_dataset_list()), inputs=None, @@ -966,7 +968,7 @@ def setup_gradio(): if os.path.isfile('./config/generate.json'): ui.load(import_generate_settings_proxy, inputs=None, outputs=generate_settings) - + if args.check_for_updates: ui.load(check_for_updates)