forked from mrq/DL-Art-School
iq checkin
This commit is contained in:
parent
9a37f3ba42
commit
7b3fc79737
|
@ -28,7 +28,6 @@ class SelfClassifyingHead(nn.Module):
|
||||||
h = self.dec(x)
|
h = self.dec(x)
|
||||||
o = self.to_output(h[:, -1])
|
o = self.to_output(h[:, -1])
|
||||||
q, c, _ = self.quantizer(o, used_codes)
|
q, c, _ = self.quantizer(o, used_codes)
|
||||||
q = torch.sigmoid(q)
|
|
||||||
return q, c
|
return q, c
|
||||||
|
|
||||||
def forward(self, x, target):
|
def forward(self, x, target):
|
||||||
|
@ -37,10 +36,13 @@ class SelfClassifyingHead(nn.Module):
|
||||||
outputs = []
|
outputs = []
|
||||||
results = []
|
results = []
|
||||||
codes = []
|
codes = []
|
||||||
|
q_reg = 0
|
||||||
for i in range(self.seq_len):
|
for i in range(self.seq_len):
|
||||||
q, c = checkpoint(functools.partial(self.do_ar_step, used_codes=codes), torch.stack(stack, dim=1))
|
q, c = checkpoint(functools.partial(self.do_ar_step, used_codes=codes), torch.stack(stack, dim=1))
|
||||||
|
q_reg = q_reg + (q ** 2).mean()
|
||||||
|
s = torch.sigmoid(q)
|
||||||
|
|
||||||
outputs.append(q)
|
outputs.append(s)
|
||||||
output = torch.stack(outputs, dim=1).sum(1)
|
output = torch.stack(outputs, dim=1).sum(1)
|
||||||
|
|
||||||
# If the addition would strictly make the result worse, set it to 0. Sometimes.
|
# If the addition would strictly make the result worse, set it to 0. Sometimes.
|
||||||
|
@ -49,13 +51,13 @@ class SelfClassifyingHead(nn.Module):
|
||||||
probabilistic_worsen = torch.rand_like(worsen) * worsen > .5
|
probabilistic_worsen = torch.rand_like(worsen) * worsen > .5
|
||||||
output = output * probabilistic_worsen.unsqueeze(-1) # This is non-differentiable, but still deterministic.
|
output = output * probabilistic_worsen.unsqueeze(-1) # This is non-differentiable, but still deterministic.
|
||||||
c[probabilistic_worsen] = -1 # Code of -1 means the code was unused.
|
c[probabilistic_worsen] = -1 # Code of -1 means the code was unused.
|
||||||
q = q * probabilistic_worsen.unsqueeze(-1)
|
s = s * probabilistic_worsen.unsqueeze(-1)
|
||||||
outputs[-1] = q
|
outputs[-1] = s
|
||||||
|
|
||||||
codes.append(c)
|
codes.append(c)
|
||||||
stack.append(self.to_decoder(q))
|
stack.append(self.to_decoder(s))
|
||||||
results.append(output)
|
results.append(output)
|
||||||
return results, torch.cat(codes, dim=0)
|
return results, torch.cat(codes, dim=0), q_reg / self.seq_len
|
||||||
|
|
||||||
|
|
||||||
class VectorResBlock(nn.Module):
|
class VectorResBlock(nn.Module):
|
||||||
|
@ -112,13 +114,13 @@ class InstrumentQuantizer(nn.Module):
|
||||||
for lyr in self.encoder:
|
for lyr in self.encoder:
|
||||||
h = lyr(h)
|
h = lyr(h)
|
||||||
|
|
||||||
reconstructions, codes = self.heads(h, f)
|
reconstructions, codes, q_reg = self.heads(h, f)
|
||||||
reconstruction_losses = torch.stack([F.mse_loss(r.reshape(b, s, c), px) for r in reconstructions])
|
reconstruction_losses = torch.stack([F.mse_loss(r.reshape(b, s, c), px) for r in reconstructions])
|
||||||
r_follow = torch.arange(1, reconstruction_losses.shape[0]+1, device=x.device)
|
r_follow = torch.arange(1, reconstruction_losses.shape[0]+1, device=x.device)
|
||||||
reconstruction_losses = (reconstruction_losses * r_follow / r_follow.shape[0])
|
reconstruction_losses = (reconstruction_losses * r_follow / r_follow.shape[0])
|
||||||
self.log_codes(codes)
|
self.log_codes(codes)
|
||||||
|
|
||||||
return reconstruction_losses
|
return reconstruction_losses, q_reg
|
||||||
|
|
||||||
def log_codes(self, codes):
|
def log_codes(self, codes):
|
||||||
if self.internal_step % 5 == 0:
|
if self.internal_step % 5 == 0:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user