moar verbosity

This commit is contained in:
James Betker 2021-12-25 23:23:21 -07:00
parent 8acf3b3097
commit 6700f8851d

View File

@ -143,9 +143,9 @@ class UnifiedGptVoice(nn.Module):
mel_inputs: long tensor, (b,m)
wav_lengths: long tensor, (b,)
"""
assert self.max_symbols_per_phrase >= mel_inputs.shape[1]
assert self.max_symbols_per_phrase >= text_inputs.shape[1]
assert self.max_total_tokens >= mel_inputs.shape[1] + text_inputs.shape[1]
assert self.max_symbols_per_phrase >= mel_inputs.shape[1], f'{mel_inputs.shape[1]}'
assert self.max_symbols_per_phrase >= text_inputs.shape[1], f'{text_inputs.shape[1]}'
assert self.max_total_tokens >= mel_inputs.shape[1] + text_inputs.shape[1], f'{mel_inputs.shape[1]}, {text_inputs.shape[1]}'
mel_inputs = self.set_mel_padding(mel_inputs, wav_lengths)
speech_conditioning_input = self.randomly_permute_conditioning_input(speech_conditioning_input)
@ -172,7 +172,7 @@ class UnifiedGptVoice(nn.Module):
Performs autoregressive modeling on only text. Still requires a speech_conditioning_input due to the way the
model inputs are formatted. Just provide any audio clip (arguably, zeros could be provided).
"""
assert self.max_symbols_per_phrase >= text_inputs.shape[1]
assert self.max_symbols_per_phrase >= text_inputs.shape[1], f'{text_inputs.shape[1]}'
speech_conditioning_input = self.randomly_permute_conditioning_input(speech_conditioning_input)
speech_conditioning_input = self.conditioning_encoder(speech_conditioning_input).unsqueeze(1)
@ -187,7 +187,7 @@ class UnifiedGptVoice(nn.Module):
"""
Performs autoregressive modeling on only speech data.
"""
assert self.max_symbols_per_phrase >= mel_inputs.shape[1]
assert self.max_symbols_per_phrase >= mel_inputs.shape[1], f'{mel_inputs.shape[1]}'
mel_inputs = self.set_mel_padding(mel_inputs, wav_lengths)
speech_conditioning_input = self.randomly_permute_conditioning_input(speech_conditioning_input)