From 26611b158eadf7b273183f575a157b2a6a821fa1 Mon Sep 17 00:00:00 2001 From: James Betker Date: Mon, 25 Apr 2022 21:28:18 -0600 Subject: [PATCH] updates --- README.md | 4 ++-- tortoise_tts.ipynb | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 928d926..0d3a7d4 100644 --- a/README.md +++ b/README.md @@ -77,8 +77,8 @@ These reference clips are recordings of a speaker that you provide to guide spee This repo comes with several pre-packaged voices. You will be familiar with many of them. :) Most of the provided voices were not found in the training set. Experimentally, it seems that voices from the training set -produce more realistic outputs then those outside of the training set. The following voices come from the training set: - atkins, dotrice, grace, harris, kennard, lescault, mol, otto. +produce more realistic outputs then those outside of the training set. Any voice prepended with "train" came from the +training set. ### Adding a new voice diff --git a/tortoise_tts.ipynb b/tortoise_tts.ipynb index d8b274f..8c38bdb 100644 --- a/tortoise_tts.ipynb +++ b/tortoise_tts.ipynb @@ -52,6 +52,8 @@ "import torch.nn as nn\n", "import torch.nn.functional as F\n", "\n", + "import IPython\n", + "\n", "from api import TextToSpeech\n", "from utils.audio import load_audio, get_voices\n", "\n", @@ -93,7 +95,7 @@ "Had worn them really about the same,\"\"\"\n", "\n", "# Pick one of the voices from above\n", - "voice = 'dotrice'\n", + "voice = 'train_dotrice'\n", "# Pick a \"preset mode\" to determine quality. Options: {\"ultra_fast\", \"fast\" (default), \"standard\", \"high_quality\"}. See docs in api.py\n", "preset = \"fast\"" ], @@ -115,7 +117,8 @@ " conds.append(c)\n", "\n", "gen = tts.tts_with_preset(text, conds, preset)\n", - "torchaudio.save('generated.wav', gen.squeeze(0).cpu(), 24000)" + "torchaudio.save('generated.wav', gen.squeeze(0).cpu(), 24000)\n", + "IPython.display.Audio('generated.wav')" ], "metadata": { "id": "KEXOKjIvn6NW" @@ -139,7 +142,8 @@ " conds.append(c)\n", "\n", "gen = tts.tts_with_preset(\"They used to say that if man was meant to fly, he’d have wings. But he did fly. He discovered he had to.\", conds, preset)\n", - "torchaudio.save('captain_kirkard.wav', gen.squeeze(0).cpu(), 24000)" + "torchaudio.save('captain_kirkard.wav', gen.squeeze(0).cpu(), 24000)\n", + "IPython.display.Audio('captain_kirkard.wav')" ], "metadata": { "id": "fYTk8KUezUr5"