This commit is contained in:
James Betker 2022-04-25 21:28:18 -06:00
parent e962c9bad7
commit ad2419f31d
2 changed files with 9 additions and 5 deletions

View File

@ -77,8 +77,8 @@ These reference clips are recordings of a speaker that you provide to guide spee
This repo comes with several pre-packaged voices. You will be familiar with many of them. :) This repo comes with several pre-packaged voices. You will be familiar with many of them. :)
Most of the provided voices were not found in the training set. Experimentally, it seems that voices from the training set Most of the provided voices were not found in the training set. Experimentally, it seems that voices from the training set
produce more realistic outputs then those outside of the training set. The following voices come from the training set: produce more realistic outputs then those outside of the training set. Any voice prepended with "train" came from the
atkins, dotrice, grace, harris, kennard, lescault, mol, otto. training set.
### Adding a new voice ### Adding a new voice

View File

@ -52,6 +52,8 @@
"import torch.nn as nn\n", "import torch.nn as nn\n",
"import torch.nn.functional as F\n", "import torch.nn.functional as F\n",
"\n", "\n",
"import IPython\n",
"\n",
"from api import TextToSpeech\n", "from api import TextToSpeech\n",
"from utils.audio import load_audio, get_voices\n", "from utils.audio import load_audio, get_voices\n",
"\n", "\n",
@ -93,7 +95,7 @@
"Had worn them really about the same,\"\"\"\n", "Had worn them really about the same,\"\"\"\n",
"\n", "\n",
"# Pick one of the voices from above\n", "# Pick one of the voices from above\n",
"voice = 'dotrice'\n", "voice = 'train_dotrice'\n",
"# Pick a \"preset mode\" to determine quality. Options: {\"ultra_fast\", \"fast\" (default), \"standard\", \"high_quality\"}. See docs in api.py\n", "# Pick a \"preset mode\" to determine quality. Options: {\"ultra_fast\", \"fast\" (default), \"standard\", \"high_quality\"}. See docs in api.py\n",
"preset = \"fast\"" "preset = \"fast\""
], ],
@ -115,7 +117,8 @@
" conds.append(c)\n", " conds.append(c)\n",
"\n", "\n",
"gen = tts.tts_with_preset(text, conds, preset)\n", "gen = tts.tts_with_preset(text, conds, preset)\n",
"torchaudio.save('generated.wav', gen.squeeze(0).cpu(), 24000)" "torchaudio.save('generated.wav', gen.squeeze(0).cpu(), 24000)\n",
"IPython.display.Audio('generated.wav')"
], ],
"metadata": { "metadata": {
"id": "KEXOKjIvn6NW" "id": "KEXOKjIvn6NW"
@ -139,7 +142,8 @@
" conds.append(c)\n", " conds.append(c)\n",
"\n", "\n",
"gen = tts.tts_with_preset(\"They used to say that if man was meant to fly, hed have wings. But he did fly. He discovered he had to.\", conds, preset)\n", "gen = tts.tts_with_preset(\"They used to say that if man was meant to fly, hed have wings. But he did fly. He discovered he had to.\", conds, preset)\n",
"torchaudio.save('captain_kirkard.wav', gen.squeeze(0).cpu(), 24000)" "torchaudio.save('captain_kirkard.wav', gen.squeeze(0).cpu(), 24000)\n",
"IPython.display.Audio('captain_kirkard.wav')"
], ],
"metadata": { "metadata": {
"id": "fYTk8KUezUr5" "id": "fYTk8KUezUr5"