DL-Art-School/recipes/tacotron2/test_tacotron2_lj.yml
2021-07-20 08:37:57 -06:00

74 lines
1.8 KiB
YAML

#### general settings
name: test_tacotron2_lj
use_tb_logger: true
gpu_ids: [0]
start_step: -1
fp16: false
checkpointing_enabled: true
wandb: false
datasets:
train:
name: lj
n_workers: 0
batch_size: 1
mode: nv_tacotron
path: E:\4k6k\datasets\audio\LJSpeech-1.1\ljs_audio_text_train_filelist.txt
networks:
mel_gen:
type: generator
which_model_G: nv_tacotron2
args:
encoder_kernel_size: 5
encoder_n_convolutions: 3
encoder_embedding_dim: 512
decoder_rnn_dim: 1024
prenet_dim: 256
max_decoder_steps: 1000
attention_rnn_dim: 1024
attention_dim: 128
attention_location_n_filters: 32
attention_location_kernel_size: 31
postnet_embedding_dim: 512
postnet_kernel_size: 5
postnet_n_convolutions: 5
waveglow:
type: generator
which_model_G: nv_waveglow
args:
n_mel_channels: 80
n_flows: 12
n_group: 8
n_early_every: 4
n_early_size: 2
WN_config:
n_layers: 8
n_channels: 256
kernel_size: 3
#### path
path:
pretrain_model_mel_gen: ../experiments/train_tacotron2_lj/models/22000_mel_gen_ema.pth
pretrain_model_waveglow: ../experiments/waveglow_256channels_universal_v5.pth
strict_load: true
#resume_state: ../experiments/train_imgset_unet_diffusion/training_state/54000.state
steps:
generator:
training: mel_gen
injectors:
mel:
type: generator
generator: mel_gen
in: [padded_text, input_lengths, padded_mel, output_lengths]
out: [mel_outputs, mel_outputs_postnet, gate_outputs, alignments]
wave:
type: generator
generator: waveglow
method: infer
in: mel_outputs
out: waveform
eval:
output_state: waveform