forked from mrq/DL-Art-School
a few fixes
This commit is contained in:
parent
35db5ebf41
commit
37e4e737b5
|
@ -106,7 +106,7 @@ class TextWavLoader(torch.utils.data.Dataset):
|
||||||
random.shuffle(self.audiopaths_and_text)
|
random.shuffle(self.audiopaths_and_text)
|
||||||
self.max_wav_len = opt_get(hparams, ['max_wav_length'], None)
|
self.max_wav_len = opt_get(hparams, ['max_wav_length'], None)
|
||||||
if self.max_wav_len is not None:
|
if self.max_wav_len is not None:
|
||||||
self.max_aligned_codes = self.max_wav_len / self.aligned_codes_to_audio_ratio
|
self.max_aligned_codes = self.max_wav_len // self.aligned_codes_to_audio_ratio
|
||||||
self.max_text_len = opt_get(hparams, ['max_text_length'], None)
|
self.max_text_len = opt_get(hparams, ['max_text_length'], None)
|
||||||
assert self.max_wav_len is not None and self.max_text_len is not None
|
assert self.max_wav_len is not None and self.max_text_len is not None
|
||||||
self.use_bpe_tokenizer = opt_get(hparams, ['use_bpe_tokenizer'], True)
|
self.use_bpe_tokenizer = opt_get(hparams, ['use_bpe_tokenizer'], True)
|
||||||
|
@ -239,7 +239,7 @@ if __name__ == '__main__':
|
||||||
'num_conditioning_candidates': 2,
|
'num_conditioning_candidates': 2,
|
||||||
'conditioning_length': 44000,
|
'conditioning_length': 44000,
|
||||||
'use_bpe_tokenizer': True,
|
'use_bpe_tokenizer': True,
|
||||||
'load_aligned_codes': False,
|
'load_aligned_codes': True,
|
||||||
}
|
}
|
||||||
from data import create_dataset, create_dataloader
|
from data import create_dataset, create_dataloader
|
||||||
|
|
||||||
|
|
|
@ -46,8 +46,7 @@ class DiffusionTts(nn.Module):
|
||||||
model_channels,
|
model_channels,
|
||||||
in_channels=1,
|
in_channels=1,
|
||||||
num_tokens=30,
|
num_tokens=30,
|
||||||
out_channels=2, # mean and variance
|
out_channels=2, # mean and variancexs
|
||||||
discrete_codes=512,
|
|
||||||
dropout=0,
|
dropout=0,
|
||||||
# res 1, 2, 4, 8,16,32,64,128,256,512, 1K, 2K
|
# res 1, 2, 4, 8,16,32,64,128,256,512, 1K, 2K
|
||||||
channel_mult= (1,1.5,2, 3, 4, 6, 8, 12, 16, 24, 32, 48),
|
channel_mult= (1,1.5,2, 3, 4, 6, 8, 12, 16, 24, 32, 48),
|
||||||
|
@ -68,7 +67,6 @@ class DiffusionTts(nn.Module):
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
scale_factor=2,
|
scale_factor=2,
|
||||||
conditioning_inputs_provided=True,
|
conditioning_inputs_provided=True,
|
||||||
conditioning_input_dim=80,
|
|
||||||
time_embed_dim_multiplier=4,
|
time_embed_dim_multiplier=4,
|
||||||
only_train_dvae_connection_layers=False,
|
only_train_dvae_connection_layers=False,
|
||||||
):
|
):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user