a few fixes

This commit is contained in:
James Betker 2022-01-16 15:17:17 -07:00
parent 35db5ebf41
commit 37e4e737b5
2 changed files with 3 additions and 5 deletions

View File

@ -106,7 +106,7 @@ class TextWavLoader(torch.utils.data.Dataset):
random.shuffle(self.audiopaths_and_text)
self.max_wav_len = opt_get(hparams, ['max_wav_length'], None)
if self.max_wav_len is not None:
self.max_aligned_codes = self.max_wav_len / self.aligned_codes_to_audio_ratio
self.max_aligned_codes = self.max_wav_len // self.aligned_codes_to_audio_ratio
self.max_text_len = opt_get(hparams, ['max_text_length'], None)
assert self.max_wav_len is not None and self.max_text_len is not None
self.use_bpe_tokenizer = opt_get(hparams, ['use_bpe_tokenizer'], True)
@ -239,7 +239,7 @@ if __name__ == '__main__':
'num_conditioning_candidates': 2,
'conditioning_length': 44000,
'use_bpe_tokenizer': True,
'load_aligned_codes': False,
'load_aligned_codes': True,
}
from data import create_dataset, create_dataloader

View File

@ -46,8 +46,7 @@ class DiffusionTts(nn.Module):
model_channels,
in_channels=1,
num_tokens=30,
out_channels=2, # mean and variance
discrete_codes=512,
out_channels=2, # mean and variancexs
dropout=0,
# res 1, 2, 4, 8,16,32,64,128,256,512, 1K, 2K
channel_mult= (1,1.5,2, 3, 4, 6, 8, 12, 16, 24, 32, 48),
@ -68,7 +67,6 @@ class DiffusionTts(nn.Module):
kernel_size=3,
scale_factor=2,
conditioning_inputs_provided=True,
conditioning_input_dim=80,
time_embed_dim_multiplier=4,
only_train_dvae_connection_layers=False,
):