forked from mrq/tortoise-tts
fixed voicefixing not working as intended, load TTS before Gradio in the webui due to how long it takes to initialize tortoise (instead of just having a block to preload it)
This commit is contained in:
parent
b85c9921d7
commit
409dec98d5
|
@ -132,6 +132,7 @@ I'll try and make a list of "common" (or what I feel may be common that I experi
|
||||||
* `torch.cuda.OutOfMemoryError: CUDA out of memory.`: You most likely have a GPU with low VRAM (~4GiB), and the small optimizations with keeping data on the GPU is enough to OOM. Please open the `start.bat` file and add `--low-vram` to the command (for example: `py app.py --low-vram`) to disable those small optimizations.
|
* `torch.cuda.OutOfMemoryError: CUDA out of memory.`: You most likely have a GPU with low VRAM (~4GiB), and the small optimizations with keeping data on the GPU is enough to OOM. Please open the `start.bat` file and add `--low-vram` to the command (for example: `py app.py --low-vram`) to disable those small optimizations.
|
||||||
* `WavFileWarning: Chunk (non-data) not understood, skipping it.`: something about your WAVs are funny, and its best to remux your audio files with FFMPEG (included batch file in `.\convert\`).
|
* `WavFileWarning: Chunk (non-data) not understood, skipping it.`: something about your WAVs are funny, and its best to remux your audio files with FFMPEG (included batch file in `.\convert\`).
|
||||||
- Honestly, I don't know if this does impact output quality, as I feel it's placebo when I do try and correct this.
|
- Honestly, I don't know if this does impact output quality, as I feel it's placebo when I do try and correct this.
|
||||||
|
* `Unable to find a valid cuDNN algorithm to run convolution`: a rather weird error message that occurs in the colab notebook. The vanilla auto-batch size calculation is a bit flawed, so try and reduce it to a fixed number in `Settings`, like eight or so.
|
||||||
|
|
||||||
#### Non-"""Issues"""
|
#### Non-"""Issues"""
|
||||||
|
|
||||||
|
|
|
@ -42,8 +42,7 @@
|
||||||
"%cd tortoise-tts\n",
|
"%cd tortoise-tts\n",
|
||||||
"!python -m pip install --upgrade pip\n",
|
"!python -m pip install --upgrade pip\n",
|
||||||
"!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116\n",
|
"!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116\n",
|
||||||
"!python -m pip install -r ./requirements.txt\n",
|
"!python -m pip install -r ./requirements.txt\n"
|
||||||
"!python setup.py install"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -78,9 +77,9 @@
|
||||||
"sys.argv = [\"\"]\n",
|
"sys.argv = [\"\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"mrq.args = mrq.setup_args()\n",
|
"mrq.args = mrq.setup_args()\n",
|
||||||
|
"mrq.tts = mrq.setup_tortoise()\n",
|
||||||
"mrq.webui = mrq.setup_gradio()\n",
|
"mrq.webui = mrq.setup_gradio()\n",
|
||||||
"mrq.webui.launch(share=True, prevent_thread_lock=True, height=1000)\n",
|
"mrq.webui.launch(share=True, prevent_thread_lock=True, height=1000)\n",
|
||||||
"mrq.tts = mrq.setup_tortoise()\n",
|
|
||||||
"mrq.webui.block_thread()"
|
"mrq.webui.block_thread()"
|
||||||
],
|
],
|
||||||
"metadata":{
|
"metadata":{
|
||||||
|
|
8
webui.py
8
webui.py
|
@ -190,7 +190,7 @@ def generate(
|
||||||
'time': run_time
|
'time': run_time
|
||||||
}
|
}
|
||||||
# save here in case some error happens mid-batch
|
# save here in case some error happens mid-batch
|
||||||
torchaudio.save(f'{outdir}/{voice}_{name}.wav', audio, args.output_sample_rate)
|
torchaudio.save(f'{outdir}/{voice}_{name}.wav', audio, tts.output_sample_rate)
|
||||||
|
|
||||||
for k in audio_cache:
|
for k in audio_cache:
|
||||||
audio = audio_cache[k]['audio']
|
audio = audio_cache[k]['audio']
|
||||||
|
@ -265,10 +265,10 @@ def generate(
|
||||||
if args.voice_fixer and voicefixer:
|
if args.voice_fixer and voicefixer:
|
||||||
# we could do this on the pieces before they get stiched up anyways to save some compute
|
# we could do this on the pieces before they get stiched up anyways to save some compute
|
||||||
# but the stitching would need to read back from disk, defeating the point of caching the waveform
|
# but the stitching would need to read back from disk, defeating the point of caching the waveform
|
||||||
for path in progress.tqdm(audio_cache, desc="Running voicefix..."):
|
for path in progress.tqdm(output_voices, desc="Running voicefix..."):
|
||||||
voicefixer.restore(
|
voicefixer.restore(
|
||||||
input=f'{outdir}/{voice}_{k}.wav',
|
input=path,
|
||||||
output=f'{outdir}/{voice}_{k}.wav',
|
output=path,
|
||||||
#cuda=False,
|
#cuda=False,
|
||||||
#mode=mode,
|
#mode=mode,
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user