From 6700f8851d10d73f1338bdb9a18c17efb69322c0 Mon Sep 17 00:00:00 2001 From: James Betker Date: Sat, 25 Dec 2021 23:23:21 -0700 Subject: [PATCH] moar verbosity --- codes/models/gpt_voice/unified_voice.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/codes/models/gpt_voice/unified_voice.py b/codes/models/gpt_voice/unified_voice.py index c892faba..fd72aef1 100644 --- a/codes/models/gpt_voice/unified_voice.py +++ b/codes/models/gpt_voice/unified_voice.py @@ -143,9 +143,9 @@ class UnifiedGptVoice(nn.Module): mel_inputs: long tensor, (b,m) wav_lengths: long tensor, (b,) """ - assert self.max_symbols_per_phrase >= mel_inputs.shape[1] - assert self.max_symbols_per_phrase >= text_inputs.shape[1] - assert self.max_total_tokens >= mel_inputs.shape[1] + text_inputs.shape[1] + assert self.max_symbols_per_phrase >= mel_inputs.shape[1], f'{mel_inputs.shape[1]}' + assert self.max_symbols_per_phrase >= text_inputs.shape[1], f'{text_inputs.shape[1]}' + assert self.max_total_tokens >= mel_inputs.shape[1] + text_inputs.shape[1], f'{mel_inputs.shape[1]}, {text_inputs.shape[1]}' mel_inputs = self.set_mel_padding(mel_inputs, wav_lengths) speech_conditioning_input = self.randomly_permute_conditioning_input(speech_conditioning_input) @@ -172,7 +172,7 @@ class UnifiedGptVoice(nn.Module): Performs autoregressive modeling on only text. Still requires a speech_conditioning_input due to the way the model inputs are formatted. Just provide any audio clip (arguably, zeros could be provided). """ - assert self.max_symbols_per_phrase >= text_inputs.shape[1] + assert self.max_symbols_per_phrase >= text_inputs.shape[1], f'{text_inputs.shape[1]}' speech_conditioning_input = self.randomly_permute_conditioning_input(speech_conditioning_input) speech_conditioning_input = self.conditioning_encoder(speech_conditioning_input).unsqueeze(1) @@ -187,7 +187,7 @@ class UnifiedGptVoice(nn.Module): """ Performs autoregressive modeling on only speech data. """ - assert self.max_symbols_per_phrase >= mel_inputs.shape[1] + assert self.max_symbols_per_phrase >= mel_inputs.shape[1], f'{mel_inputs.shape[1]}' mel_inputs = self.set_mel_padding(mel_inputs, wav_lengths) speech_conditioning_input = self.randomly_permute_conditioning_input(speech_conditioning_input)