Better error messages when inputs are out of bounds.

This commit is contained in:
James Betker 2022-05-01 17:39:36 -06:00
parent 66805da4bd
commit f823e31e49

View File

@ -107,7 +107,9 @@ def fix_autoregressive_output(codes, stop_token, complain=True):
stop_token_indices = (codes == stop_token).nonzero() stop_token_indices = (codes == stop_token).nonzero()
if len(stop_token_indices) == 0: if len(stop_token_indices) == 0:
if complain: if complain:
print("No stop tokens found, enjoy that output of yours!") print("No stop tokens found. This typically means the spoken audio is too long. In some cases, the output "
"will still be good, though. Listen to it and if it is missing words, try breaking up your input "
"text.")
return codes return codes
else: else:
codes[stop_token_indices] = 83 codes[stop_token_indices] = 83
@ -310,6 +312,7 @@ class TextToSpeech:
""" """
text = torch.IntTensor(self.tokenizer.encode(text)).unsqueeze(0).cuda() text = torch.IntTensor(self.tokenizer.encode(text)).unsqueeze(0).cuda()
text = F.pad(text, (0, 1)) # This may not be necessary. text = F.pad(text, (0, 1)) # This may not be necessary.
assert text.shape[-1] < 400, 'Too much text provided. Break the text up into separate segments and re-try inference.'
if voice_samples is not None: if voice_samples is not None:
auto_conditioning, diffusion_conditioning = self.get_conditioning_latents(voice_samples) auto_conditioning, diffusion_conditioning = self.get_conditioning_latents(voice_samples)