From c75d0bc5dab24379b27d796d305cb771b6713810 Mon Sep 17 00:00:00 2001 From: mrq Date: Fri, 17 Feb 2023 20:43:12 +0000 Subject: [PATCH] pulls DLAS for any updates since I might be actually updating it, added option to not load TTS on initialization to save VRAM when training --- notebook.ipynb | 2 +- src/main.py | 6 ++++-- src/utils.py | 8 ++++++-- src/webui.py | 3 ++- update.bat | 4 ++++ update.sh | 6 +++++- 6 files changed, 22 insertions(+), 7 deletions(-) diff --git a/notebook.ipynb b/notebook.ipynb index 2ca8b41..9257f23 100755 --- a/notebook.ipynb +++ b/notebook.ipynb @@ -141,7 +141,7 @@ "cell_type":"code", "source":[ "%cd /content/ai-voice-cloning\n", - "!python ./src/train.py -opt ./training/finetune.yml" + "!python ./src/train.py -opt ./training/finetune.yaml" ], "metadata":{ "id":"-KayB8klA5tY" diff --git a/src/main.py b/src/main.py index e5641c7..582d50a 100755 --- a/src/main.py +++ b/src/main.py @@ -17,8 +17,9 @@ if __name__ == "__main__": uvicorn.run("main:app", host=args.listen_host, port=args.listen_port if not None else 8000) else: webui = setup_gradio() - tts = setup_tortoise() webui.launch(share=args.share, prevent_thread_lock=True, show_error=True, server_name=args.listen_host, server_port=args.listen_port) + if not args.defer_tts_load: + tts = setup_tortoise() webui.block_thread() elif __name__ == "main": @@ -33,4 +34,5 @@ elif __name__ == "main": webui = setup_gradio() app = gr.mount_gradio_app(app, webui, path=args.listen_path) - tts = setup_tortoise() \ No newline at end of file + if not args.defer_tts_load: + tts = setup_tortoise() \ No newline at end of file diff --git a/src/utils.py b/src/utils.py index 5af1777..5015a10 100755 --- a/src/utils.py +++ b/src/utils.py @@ -58,6 +58,7 @@ def setup_args(): 'voice-fixer': False, # getting tired of long initialization times in a Colab for downloading a large dataset for it 'voice-fixer-use-cuda': True, 'force-cpu-for-conditioning-latents': False, + 'defer-tts-load': False, 'device-override': None, 'whisper-model': "base", 'concurrency-count': 2, @@ -82,6 +83,7 @@ def setup_args(): parser.add_argument("--voice-fixer", action='store_true', default=default_arguments['voice-fixer'], help="Uses python module 'voicefixer' to improve audio quality, if available.") parser.add_argument("--voice-fixer-use-cuda", action='store_true', default=default_arguments['voice-fixer-use-cuda'], help="Hints to voicefixer to use CUDA, if available.") parser.add_argument("--force-cpu-for-conditioning-latents", default=default_arguments['force-cpu-for-conditioning-latents'], action='store_true', help="Forces computing conditional latents to be done on the CPU (if you constantyl OOM on low chunk counts)") + parser.add_argument("--defer-tts-load", default=default_arguments['defer-tts-load'], action='store_true', help="Defers loading TTS model") parser.add_argument("--device-override", default=default_arguments['device-override'], help="A device string to override pass through Torch") parser.add_argument("--whisper-model", default=default_arguments['whisper-model'], help="Specifies which whisper model to use for transcription.") parser.add_argument("--sample-batch-size", default=default_arguments['sample-batch-size'], type=int, help="Sets how many batches to use during the autoregressive samples pass") @@ -434,7 +436,7 @@ def run_training(config_path): cmd = ["python", "./src/train.py", "-opt", config_path] print("Spawning process: ", " ".join(cmd)) - subprocess.run(cmd, env=os.environ.copy(), shell=True, stdout=subprocess.STDOUT, stderr=subprocess.STDOUT) + subprocess.run(cmd, env=os.environ.copy(), shell=True) """ from train import train train(config) @@ -681,7 +683,7 @@ def get_voice_list(dir=get_voice_dir()): os.makedirs(dir, exist_ok=True) return sorted([d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d)) and len(os.listdir(os.path.join(dir, d))) > 0 ]) + ["microphone", "random"] -def export_exec_settings( listen, share, check_for_updates, models_from_local_only, low_vram, embed_output_metadata, latents_lean_and_mean, voice_fixer, voice_fixer_use_cuda, force_cpu_for_conditioning_latents, device_override, whisper_model, sample_batch_size, concurrency_count, output_sample_rate, output_volume ): +def export_exec_settings( listen, share, check_for_updates, models_from_local_only, low_vram, embed_output_metadata, latents_lean_and_mean, voice_fixer, voice_fixer_use_cuda, force_cpu_for_conditioning_latents, defer_tts_load, device_override, whisper_model, sample_batch_size, concurrency_count, output_sample_rate, output_volume ): global args args.listen = listen @@ -690,6 +692,7 @@ def export_exec_settings( listen, share, check_for_updates, models_from_local_on args.models_from_local_only = models_from_local_only args.low_vram = low_vram args.force_cpu_for_conditioning_latents = force_cpu_for_conditioning_latents + args.defer_tts_load = defer_tts_load args.device_override = device_override args.whisper_model = whisper_model args.sample_batch_size = sample_batch_size @@ -708,6 +711,7 @@ def export_exec_settings( listen, share, check_for_updates, models_from_local_on 'check-for-updates':args.check_for_updates, 'models-from-local-only':args.models_from_local_only, 'force-cpu-for-conditioning-latents': args.force_cpu_for_conditioning_latents, + 'defer-tts-load': args.defer_tts_load, 'device-override': args.device_override, 'whisper-model': args.whisper_model, 'sample-batch-size': args.sample_batch_size, diff --git a/src/webui.py b/src/webui.py index 9fa6738..2101f85 100755 --- a/src/webui.py +++ b/src/webui.py @@ -182,7 +182,7 @@ def read_generate_settings_proxy(file, saveAs='.temp'): def prepare_dataset_proxy( voice, language ): return prepare_dataset( get_voices(load_latents=False)[voice], outdir=f"./training/{voice}/", language=language ) - + def update_voices(): return ( gr.Dropdown.update(choices=get_voice_list()), @@ -463,6 +463,7 @@ def setup_gradio(): gr.Checkbox(label="Voice Fixer", value=args.voice_fixer), gr.Checkbox(label="Use CUDA for Voice Fixer", value=args.voice_fixer_use_cuda), gr.Checkbox(label="Force CPU for Conditioning Latents", value=args.force_cpu_for_conditioning_latents), + gr.Checkbox(label="Defer TTS Load", value=args.defer_tts_load), gr.Textbox(label="Device Override", value=args.device_override), gr.Dropdown(label="Whisper Model", value=args.whisper_model, choices=["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large"]), ] diff --git a/update.bat b/update.bat index a856d8b..d69c975 100755 --- a/update.bat +++ b/update.bat @@ -5,4 +5,8 @@ python -m pip install --upgrade pip python -m pip install -r ./requirements.txt python -m pip install -r ./dlas/requirements.txt deactivate + +cd dlas +git pull +cd .. pause \ No newline at end of file diff --git a/update.sh b/update.sh index f861154..470ccab 100755 --- a/update.sh +++ b/update.sh @@ -4,4 +4,8 @@ source ./venv/bin/activate python -m pip install --upgrade pip python -m pip install -r ./requirements.txt python -m pip install -r ./dlas/requirements.txt -deactivate \ No newline at end of file +deactivate + +cd dlas +git pull +cd .. \ No newline at end of file