forked from mrq/tortoise-tts
updates
This commit is contained in:
parent
e962c9bad7
commit
ad2419f31d
|
@ -77,8 +77,8 @@ These reference clips are recordings of a speaker that you provide to guide spee
|
||||||
This repo comes with several pre-packaged voices. You will be familiar with many of them. :)
|
This repo comes with several pre-packaged voices. You will be familiar with many of them. :)
|
||||||
|
|
||||||
Most of the provided voices were not found in the training set. Experimentally, it seems that voices from the training set
|
Most of the provided voices were not found in the training set. Experimentally, it seems that voices from the training set
|
||||||
produce more realistic outputs then those outside of the training set. The following voices come from the training set:
|
produce more realistic outputs then those outside of the training set. Any voice prepended with "train" came from the
|
||||||
atkins, dotrice, grace, harris, kennard, lescault, mol, otto.
|
training set.
|
||||||
|
|
||||||
### Adding a new voice
|
### Adding a new voice
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,8 @@
|
||||||
"import torch.nn as nn\n",
|
"import torch.nn as nn\n",
|
||||||
"import torch.nn.functional as F\n",
|
"import torch.nn.functional as F\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"import IPython\n",
|
||||||
|
"\n",
|
||||||
"from api import TextToSpeech\n",
|
"from api import TextToSpeech\n",
|
||||||
"from utils.audio import load_audio, get_voices\n",
|
"from utils.audio import load_audio, get_voices\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -93,7 +95,7 @@
|
||||||
"Had worn them really about the same,\"\"\"\n",
|
"Had worn them really about the same,\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Pick one of the voices from above\n",
|
"# Pick one of the voices from above\n",
|
||||||
"voice = 'dotrice'\n",
|
"voice = 'train_dotrice'\n",
|
||||||
"# Pick a \"preset mode\" to determine quality. Options: {\"ultra_fast\", \"fast\" (default), \"standard\", \"high_quality\"}. See docs in api.py\n",
|
"# Pick a \"preset mode\" to determine quality. Options: {\"ultra_fast\", \"fast\" (default), \"standard\", \"high_quality\"}. See docs in api.py\n",
|
||||||
"preset = \"fast\""
|
"preset = \"fast\""
|
||||||
],
|
],
|
||||||
|
@ -115,7 +117,8 @@
|
||||||
" conds.append(c)\n",
|
" conds.append(c)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"gen = tts.tts_with_preset(text, conds, preset)\n",
|
"gen = tts.tts_with_preset(text, conds, preset)\n",
|
||||||
"torchaudio.save('generated.wav', gen.squeeze(0).cpu(), 24000)"
|
"torchaudio.save('generated.wav', gen.squeeze(0).cpu(), 24000)\n",
|
||||||
|
"IPython.display.Audio('generated.wav')"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "KEXOKjIvn6NW"
|
"id": "KEXOKjIvn6NW"
|
||||||
|
@ -139,7 +142,8 @@
|
||||||
" conds.append(c)\n",
|
" conds.append(c)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"gen = tts.tts_with_preset(\"They used to say that if man was meant to fly, he’d have wings. But he did fly. He discovered he had to.\", conds, preset)\n",
|
"gen = tts.tts_with_preset(\"They used to say that if man was meant to fly, he’d have wings. But he did fly. He discovered he had to.\", conds, preset)\n",
|
||||||
"torchaudio.save('captain_kirkard.wav', gen.squeeze(0).cpu(), 24000)"
|
"torchaudio.save('captain_kirkard.wav', gen.squeeze(0).cpu(), 24000)\n",
|
||||||
|
"IPython.display.Audio('captain_kirkard.wav')"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "fYTk8KUezUr5"
|
"id": "fYTk8KUezUr5"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user