From 409dec98d5531a3376030a11a99a6f66f33fbeec Mon Sep 17 00:00:00 2001 From: mrq Date: Sun, 12 Feb 2023 20:05:59 +0000 Subject: [PATCH] fixed voicefixing not working as intended, load TTS before Gradio in the webui due to how long it takes to initialize tortoise (instead of just having a block to preload it) --- README.md | 1 + tortoise_tts.ipynb | 5 ++--- webui.py | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 3f602f0..636b5bf 100755 --- a/README.md +++ b/README.md @@ -132,6 +132,7 @@ I'll try and make a list of "common" (or what I feel may be common that I experi * `torch.cuda.OutOfMemoryError: CUDA out of memory.`: You most likely have a GPU with low VRAM (~4GiB), and the small optimizations with keeping data on the GPU is enough to OOM. Please open the `start.bat` file and add `--low-vram` to the command (for example: `py app.py --low-vram`) to disable those small optimizations. * `WavFileWarning: Chunk (non-data) not understood, skipping it.`: something about your WAVs are funny, and its best to remux your audio files with FFMPEG (included batch file in `.\convert\`). - Honestly, I don't know if this does impact output quality, as I feel it's placebo when I do try and correct this. +* `Unable to find a valid cuDNN algorithm to run convolution`: a rather weird error message that occurs in the colab notebook. The vanilla auto-batch size calculation is a bit flawed, so try and reduce it to a fixed number in `Settings`, like eight or so. #### Non-"""Issues""" diff --git a/tortoise_tts.ipynb b/tortoise_tts.ipynb index 85420c6..a6d91a5 100755 --- a/tortoise_tts.ipynb +++ b/tortoise_tts.ipynb @@ -42,8 +42,7 @@ "%cd tortoise-tts\n", "!python -m pip install --upgrade pip\n", "!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116\n", - "!python -m pip install -r ./requirements.txt\n", - "!python setup.py install" + "!python -m pip install -r ./requirements.txt\n" ] }, { @@ -78,9 +77,9 @@ "sys.argv = [\"\"]\n", "\n", "mrq.args = mrq.setup_args()\n", + "mrq.tts = mrq.setup_tortoise()\n", "mrq.webui = mrq.setup_gradio()\n", "mrq.webui.launch(share=True, prevent_thread_lock=True, height=1000)\n", - "mrq.tts = mrq.setup_tortoise()\n", "mrq.webui.block_thread()" ], "metadata":{ diff --git a/webui.py b/webui.py index dfcc2fb..d8cf897 100755 --- a/webui.py +++ b/webui.py @@ -190,7 +190,7 @@ def generate( 'time': run_time } # save here in case some error happens mid-batch - torchaudio.save(f'{outdir}/{voice}_{name}.wav', audio, args.output_sample_rate) + torchaudio.save(f'{outdir}/{voice}_{name}.wav', audio, tts.output_sample_rate) for k in audio_cache: audio = audio_cache[k]['audio'] @@ -265,10 +265,10 @@ def generate( if args.voice_fixer and voicefixer: # we could do this on the pieces before they get stiched up anyways to save some compute # but the stitching would need to read back from disk, defeating the point of caching the waveform - for path in progress.tqdm(audio_cache, desc="Running voicefix..."): + for path in progress.tqdm(output_voices, desc="Running voicefix..."): voicefixer.restore( - input=f'{outdir}/{voice}_{k}.wav', - output=f'{outdir}/{voice}_{k}.wav', + input=path, + output=path, #cuda=False, #mode=mode, )