From 89d15c9e74a5e03366832c52f7963c2472d5d22b Mon Sep 17 00:00:00 2001 From: James Betker Date: Thu, 5 Aug 2021 22:15:13 -0600 Subject: [PATCH] Move gpt-tts back to lucidrains implementation Much better performance. --- codes/models/gpt_voice/gpt_tts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codes/models/gpt_voice/gpt_tts.py b/codes/models/gpt_voice/gpt_tts.py index 74fefed2..70096c08 100644 --- a/codes/models/gpt_voice/gpt_tts.py +++ b/codes/models/gpt_voice/gpt_tts.py @@ -28,8 +28,8 @@ class GptTts(nn.Module): self.mel_embedding = nn.Embedding(self.MEL_DICTIONARY_SIZE, model_dim) self.text_pos_embedding = nn.Embedding(self.MAX_SYMBOLS_PER_PHRASE, model_dim) self.mel_pos_embedding = nn.Embedding(max_mel_frames, model_dim) - self.gpt = GPT(GPTConfig(1+self.MAX_SYMBOLS_PER_PHRASE+max_mel_frames, n_layer=8, n_embd=model_dim, n_head=8), do_pos_emb=False) - #self.gpt = Transformer(dim=model_dim, depth=8, seq_len=1+self.MAX_SYMBOLS_PER_PHRASE+max_mel_frames, heads=8) + #self.gpt = GPT(GPTConfig(1+self.MAX_SYMBOLS_PER_PHRASE+max_mel_frames, n_layer=8, n_embd=model_dim, n_head=8), do_pos_emb=False) + self.gpt = Transformer(dim=model_dim, depth=8, seq_len=1+self.MAX_SYMBOLS_PER_PHRASE+max_mel_frames, heads=8) self.final_norm = nn.LayerNorm(model_dim) self.text_head = nn.Linear(model_dim, self.NUMBER_TEXT_TOKENS)