forked from mrq/DL-Art-School
a few fixes
This commit is contained in:
parent
35db5ebf41
commit
37e4e737b5
|
@ -106,7 +106,7 @@ class TextWavLoader(torch.utils.data.Dataset):
|
|||
random.shuffle(self.audiopaths_and_text)
|
||||
self.max_wav_len = opt_get(hparams, ['max_wav_length'], None)
|
||||
if self.max_wav_len is not None:
|
||||
self.max_aligned_codes = self.max_wav_len / self.aligned_codes_to_audio_ratio
|
||||
self.max_aligned_codes = self.max_wav_len // self.aligned_codes_to_audio_ratio
|
||||
self.max_text_len = opt_get(hparams, ['max_text_length'], None)
|
||||
assert self.max_wav_len is not None and self.max_text_len is not None
|
||||
self.use_bpe_tokenizer = opt_get(hparams, ['use_bpe_tokenizer'], True)
|
||||
|
@ -239,7 +239,7 @@ if __name__ == '__main__':
|
|||
'num_conditioning_candidates': 2,
|
||||
'conditioning_length': 44000,
|
||||
'use_bpe_tokenizer': True,
|
||||
'load_aligned_codes': False,
|
||||
'load_aligned_codes': True,
|
||||
}
|
||||
from data import create_dataset, create_dataloader
|
||||
|
||||
|
|
|
@ -46,8 +46,7 @@ class DiffusionTts(nn.Module):
|
|||
model_channels,
|
||||
in_channels=1,
|
||||
num_tokens=30,
|
||||
out_channels=2, # mean and variance
|
||||
discrete_codes=512,
|
||||
out_channels=2, # mean and variancexs
|
||||
dropout=0,
|
||||
# res 1, 2, 4, 8,16,32,64,128,256,512, 1K, 2K
|
||||
channel_mult= (1,1.5,2, 3, 4, 6, 8, 12, 16, 24, 32, 48),
|
||||
|
@ -68,7 +67,6 @@ class DiffusionTts(nn.Module):
|
|||
kernel_size=3,
|
||||
scale_factor=2,
|
||||
conditioning_inputs_provided=True,
|
||||
conditioning_input_dim=80,
|
||||
time_embed_dim_multiplier=4,
|
||||
only_train_dvae_connection_layers=False,
|
||||
):
|
||||
|
|
Loading…
Reference in New Issue
Block a user