Decrease resolution requirements to 2048

This commit is contained in:
James Betker 2022-01-20 11:27:49 -07:00
parent 4af8525dc3
commit 8e2439f50d
3 changed files with 4 additions and 4 deletions

View File

@ -297,7 +297,7 @@ class DiffusionTts(nn.Module):
:return: an [N x C x ...] Tensor of outputs. :return: an [N x C x ...] Tensor of outputs.
""" """
orig_x_shape = x.shape[-1] orig_x_shape = x.shape[-1]
cm = ceil_multiple(x.shape[-1], 4096) cm = ceil_multiple(x.shape[-1], 2048)
if cm != 0: if cm != 0:
pc = (cm-x.shape[-1])/x.shape[-1] pc = (cm-x.shape[-1])/x.shape[-1]
x = F.pad(x, (0,cm-x.shape[-1])) x = F.pad(x, (0,cm-x.shape[-1]))

View File

@ -310,7 +310,7 @@ class DiffusionVocoderWithRef(nn.Module):
:param y: an [N] Tensor of labels, if class-conditional. :param y: an [N] Tensor of labels, if class-conditional.
:return: an [N x C x ...] Tensor of outputs. :return: an [N x C x ...] Tensor of outputs.
""" """
assert x.shape[-1] % 4096 == 0 # This model operates at base//4096 at it's bottom levels, thus this requirement. assert x.shape[-1] % 2048 == 0 # This model operates at base//2048 at it's bottom levels, thus this requirement.
if self.conditioning_enabled: if self.conditioning_enabled:
assert conditioning_input is not None assert conditioning_input is not None

View File

@ -65,9 +65,9 @@ def do_spectrogram_diffusion(diffusion_model, dvae_model, diffuser, mel_codes, c
if plt_spec: if plt_spec:
plot_spectrogram(mel[0].cpu()) plot_spectrogram(mel[0].cpu())
# Pad MEL to multiples of 4096//spectrogram_compression_factor # Pad MEL to multiples of 2048//spectrogram_compression_factor
msl = mel.shape[-1] msl = mel.shape[-1]
dsl = 4096 // spectrogram_compression_factor dsl = 2048 // spectrogram_compression_factor
gap = dsl - (msl % dsl) gap = dsl - (msl % dsl)
if gap > 0: if gap > 0:
mel = torch.nn.functional.pad(mel, (0, gap)) mel = torch.nn.functional.pad(mel, (0, gap))