forked from mrq/DL-Art-School
vqvae docs (unfinished)
This commit is contained in:
parent
acf1535b14
commit
197d19714f
|
@ -60,7 +60,6 @@ steps:
|
||||||
training: generator
|
training: generator
|
||||||
|
|
||||||
optimizer_params:
|
optimizer_params:
|
||||||
# Optimizer params
|
|
||||||
lr: !!float 2e-4
|
lr: !!float 2e-4
|
||||||
weight_decay: 0
|
weight_decay: 0
|
||||||
beta1: 0.9
|
beta1: 0.9
|
||||||
|
|
22
recipes/vqvae2/README.md
Normal file
22
recipes/vqvae2/README.md
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# VQVAE2 in Pytorch
|
||||||
|
|
||||||
|
[VQVAE2](https://arxiv.org/pdf/1906.00446.pdf) is a generative autoencoder developed by Deepmind. It's unique innovation is
|
||||||
|
discretizing the latent space into a fixed set of "codebook" vectors. This codebook
|
||||||
|
can then be used in downstream tasks to rebuild images from the training set.
|
||||||
|
|
||||||
|
This model is in DLAS thanks to work [@rosinality](https://github.com/rosinality) did
|
||||||
|
[converting the Deepmind model](https://github.com/rosinality/vq-vae-2-pytorch) to Pytorch.
|
||||||
|
|
||||||
|
# Training VQVAE2
|
||||||
|
|
||||||
|
VQVAE2 is trained in two steps:
|
||||||
|
|
||||||
|
## Training the autoencoder
|
||||||
|
|
||||||
|
This first step is to train the autoencoder itself. The config file `train_imgnet_vqvae_stage1.yml` provided shows how to do this
|
||||||
|
for imagenet with the hyperparameters specified by deepmind. You'll need to bring your own imagenet folder for this.
|
||||||
|
|
||||||
|
## Training the PixelCNN encoder
|
||||||
|
|
||||||
|
The second step is to train the PixelCNN model which will create "codebook" vectors given an
|
||||||
|
input image.
|
108
recipes/vqvae2/train_imgnet_vqvae_stage1.yml
Normal file
108
recipes/vqvae2/train_imgnet_vqvae_stage1.yml
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
name: train_imgnet_vqvae_stage1
|
||||||
|
model: extensibletrainer
|
||||||
|
scale: 1
|
||||||
|
gpu_ids: [0]
|
||||||
|
start_step: -1
|
||||||
|
checkpointing_enabled: true # <-- Gradient checkpointing. Enable for huge GPU memory savings. Disable for distributed training.
|
||||||
|
fp16: false
|
||||||
|
wandb: false # <-- enable to log to wandb. tensorboard logging is always enabled.
|
||||||
|
|
||||||
|
datasets:
|
||||||
|
train:
|
||||||
|
name: imgnet
|
||||||
|
n_workers: 8
|
||||||
|
batch_size: 128
|
||||||
|
mode: imagefolder
|
||||||
|
paths: /content/imagenet # <-- Put your imagenet path here.
|
||||||
|
target_size: 224
|
||||||
|
scale: 1
|
||||||
|
val:
|
||||||
|
name: val
|
||||||
|
mode: fullimage
|
||||||
|
dataroot_GT: /content/imagenet_val
|
||||||
|
min_tile_size: 32
|
||||||
|
scale: 1
|
||||||
|
force_multiple: 16
|
||||||
|
|
||||||
|
networks:
|
||||||
|
generator:
|
||||||
|
type: generator
|
||||||
|
which_model_G: vqvae
|
||||||
|
kwargs:
|
||||||
|
# Hyperparameters specified from VQVAE2 paper.
|
||||||
|
in_channel: 3
|
||||||
|
channel: 128
|
||||||
|
n_res_block: 2
|
||||||
|
n_res_channel: 32
|
||||||
|
codebook_dim: 64
|
||||||
|
codebook_size: 512
|
||||||
|
|
||||||
|
#### path
|
||||||
|
path:
|
||||||
|
#pretrain_model_generator: <insert pretrained model path if desired>
|
||||||
|
strict_load: true
|
||||||
|
#resume_state: ../experiments/train_imgnet_vqvae_stage1/training_state/0.state # <-- Set this to resume from a previous training state.
|
||||||
|
|
||||||
|
steps:
|
||||||
|
generator:
|
||||||
|
training: generator
|
||||||
|
|
||||||
|
optimizer_params:
|
||||||
|
lr: !!float 3e-4
|
||||||
|
weight_decay: 0
|
||||||
|
beta1: 0.9
|
||||||
|
beta2: 0.99
|
||||||
|
|
||||||
|
injectors:
|
||||||
|
# Cool hack for more training diversity:
|
||||||
|
# Make sure to change below references to `hq` to `cropped`.
|
||||||
|
#random_crop:
|
||||||
|
# train: true
|
||||||
|
# type: random_crop
|
||||||
|
# dim_in: 224
|
||||||
|
# dim_out: 192
|
||||||
|
# in: hq
|
||||||
|
# out: cropped
|
||||||
|
gen_inj_train:
|
||||||
|
train: true
|
||||||
|
type: generator
|
||||||
|
generator: generator
|
||||||
|
in: hq
|
||||||
|
out: [gen, codebook_commitment_loss]
|
||||||
|
losses:
|
||||||
|
pixel_mse_loss:
|
||||||
|
type: pix
|
||||||
|
criterion: l2
|
||||||
|
weight: 1
|
||||||
|
fake: gen
|
||||||
|
real: hq
|
||||||
|
commitment_loss:
|
||||||
|
type: direct
|
||||||
|
weight: .25
|
||||||
|
key: codebook_commitment_loss
|
||||||
|
|
||||||
|
train:
|
||||||
|
niter: 500000
|
||||||
|
warmup_iter: -1
|
||||||
|
mega_batch_factor: 1 # <-- Gradient accumulation factor. If you are running OOM, increase this to [2,4,8].
|
||||||
|
val_freq: 4000
|
||||||
|
|
||||||
|
# Optimizer/LR schedule was not specified in the paper. Using an arbitrary default one.
|
||||||
|
default_lr_scheme: MultiStepLR
|
||||||
|
gen_lr_steps: [50000, 100000, 140000, 180000]
|
||||||
|
lr_gamma: 0.5
|
||||||
|
|
||||||
|
eval:
|
||||||
|
output_state: gen
|
||||||
|
injectors:
|
||||||
|
gen_inj_eval:
|
||||||
|
type: generator
|
||||||
|
generator: generator
|
||||||
|
in: hq
|
||||||
|
out: [gen, codebook_commitment_loss]
|
||||||
|
|
||||||
|
logger:
|
||||||
|
print_freq: 30
|
||||||
|
save_checkpoint_freq: 2000
|
||||||
|
visuals: [gen, hq, cropped]
|
||||||
|
visual_debug_rate: 100
|
Loading…
Reference in New Issue
Block a user