forked from mrq/DL-Art-School
Allow flat dvae
This commit is contained in:
parent
f3db41f125
commit
019acfa4c5
|
@ -78,7 +78,6 @@ class DiscreteVAE(nn.Module):
|
||||||
discretization_loss_averaging_steps = 100,
|
discretization_loss_averaging_steps = 100,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
assert num_layers >= 1, 'number of layers must be greater than or equal to 1'
|
|
||||||
has_resblocks = num_resnet_blocks > 0
|
has_resblocks = num_resnet_blocks > 0
|
||||||
|
|
||||||
self.num_tokens = num_tokens
|
self.num_tokens = num_tokens
|
||||||
|
@ -106,35 +105,43 @@ class DiscreteVAE(nn.Module):
|
||||||
assert NotImplementedError()
|
assert NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
enc_chans = [hidden_dim * 2 ** i for i in range(num_layers)]
|
|
||||||
dec_chans = list(reversed(enc_chans))
|
|
||||||
|
|
||||||
enc_chans = [channels, *enc_chans]
|
|
||||||
|
|
||||||
dec_init_chan = codebook_dim if not has_resblocks else dec_chans[0]
|
|
||||||
dec_chans = [dec_init_chan, *dec_chans]
|
|
||||||
|
|
||||||
enc_chans_io, dec_chans_io = map(lambda t: list(zip(t[:-1], t[1:])), (enc_chans, dec_chans))
|
|
||||||
|
|
||||||
enc_layers = []
|
enc_layers = []
|
||||||
dec_layers = []
|
dec_layers = []
|
||||||
|
|
||||||
pad = (kernel_size - 1) // 2
|
if num_layers > 0:
|
||||||
for (enc_in, enc_out), (dec_in, dec_out) in zip(enc_chans_io, dec_chans_io):
|
enc_chans = [hidden_dim * 2 ** i for i in range(num_layers)]
|
||||||
enc_layers.append(nn.Sequential(conv(enc_in, enc_out, kernel_size, stride = stride, padding = pad), act()))
|
dec_chans = list(reversed(enc_chans))
|
||||||
if encoder_norm:
|
|
||||||
enc_layers.append(nn.GroupNorm(8, enc_out))
|
enc_chans = [channels, *enc_chans]
|
||||||
dec_layers.append(nn.Sequential(conv_transpose(dec_in, dec_out, kernel_size, stride = stride, padding = pad), act()))
|
|
||||||
|
dec_init_chan = codebook_dim if not has_resblocks else dec_chans[0]
|
||||||
|
dec_chans = [dec_init_chan, *dec_chans]
|
||||||
|
|
||||||
|
enc_chans_io, dec_chans_io = map(lambda t: list(zip(t[:-1], t[1:])), (enc_chans, dec_chans))
|
||||||
|
|
||||||
|
pad = (kernel_size - 1) // 2
|
||||||
|
for (enc_in, enc_out), (dec_in, dec_out) in zip(enc_chans_io, dec_chans_io):
|
||||||
|
enc_layers.append(nn.Sequential(conv(enc_in, enc_out, kernel_size, stride = stride, padding = pad), act()))
|
||||||
|
if encoder_norm:
|
||||||
|
enc_layers.append(nn.GroupNorm(8, enc_out))
|
||||||
|
dec_layers.append(nn.Sequential(conv_transpose(dec_in, dec_out, kernel_size, stride = stride, padding = pad), act()))
|
||||||
|
dec_out_chans = dec_chans[-1]
|
||||||
|
innermost_dim = dec_chans[0]
|
||||||
|
else:
|
||||||
|
enc_layers.append(nn.Sequential(conv(channels, hidden_dim, 1), act()))
|
||||||
|
dec_out_chans = hidden_dim
|
||||||
|
innermost_dim = hidden_dim
|
||||||
|
|
||||||
|
|
||||||
for _ in range(num_resnet_blocks):
|
for _ in range(num_resnet_blocks):
|
||||||
dec_layers.insert(0, ResBlock(dec_chans[1], conv, act))
|
dec_layers.insert(0, ResBlock(innermost_dim, conv, act))
|
||||||
enc_layers.append(ResBlock(enc_chans[-1], conv, act))
|
enc_layers.append(ResBlock(innermost_dim, conv, act))
|
||||||
|
|
||||||
if num_resnet_blocks > 0:
|
if num_resnet_blocks > 0:
|
||||||
dec_layers.insert(0, conv(codebook_dim, dec_chans[1], 1))
|
dec_layers.insert(0, conv(codebook_dim, innermost_dim, 1))
|
||||||
|
|
||||||
enc_layers.append(conv(enc_chans[-1], codebook_dim, 1))
|
enc_layers.append(conv(innermost_dim, codebook_dim, 1))
|
||||||
dec_layers.append(conv(dec_chans[-1], channels, 1))
|
dec_layers.append(conv(dec_out_chans, channels, 1))
|
||||||
|
|
||||||
self.encoder = nn.Sequential(*enc_layers)
|
self.encoder = nn.Sequential(*enc_layers)
|
||||||
self.decoder = nn.Sequential(*dec_layers)
|
self.decoder = nn.Sequential(*dec_layers)
|
||||||
|
@ -258,7 +265,7 @@ if __name__ == '__main__':
|
||||||
#o=v(torch.randn(1,3,256,256))
|
#o=v(torch.randn(1,3,256,256))
|
||||||
#print(o.shape)
|
#print(o.shape)
|
||||||
v = DiscreteVAE(channels=80, normalization=None, positional_dims=1, num_tokens=4096, codebook_dim=4096,
|
v = DiscreteVAE(channels=80, normalization=None, positional_dims=1, num_tokens=4096, codebook_dim=4096,
|
||||||
hidden_dim=256, stride=2, num_resnet_blocks=2, kernel_size=3, num_layers=2, use_transposed_convs=False)
|
hidden_dim=256, stride=2, num_resnet_blocks=2, kernel_size=3, num_layers=0, use_transposed_convs=False)
|
||||||
#v.eval()
|
#v.eval()
|
||||||
o=v(torch.randn(1,80,256))
|
o=v(torch.randn(1,80,256))
|
||||||
print(o[-1].shape)
|
print(o[-1].shape)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user