Attempt to fix nan

2021-06-07 11:43:42 -06:00 · 2021-06-07 11:43:42 -06:00 · 1c574c5bd1
commit 1c574c5bd1
parent eda796985b
1 changed files with 8 additions and 7 deletions
--- a/codes/models/classifiers/cifar_resnet_branched.py
+++ b/codes/models/classifiers/cifar_resnet_branched.py
@ -158,6 +158,7 @@ class DropoutNorm(SwitchNorm):
        # Compute the dropout probabilities. This module is a no-op before the accumulator is initialized.
        if self.accumulator_filled > 0:
            with torch.no_grad():
                probs = torch.mean(self.accumulator, dim=0) * self.dropout_rate
                bs, br = x.shape[:2]
                drop = torch.rand((bs, br), device=x.device) > probs.unsqueeze(0)
@ -172,7 +173,7 @@ class DropoutNorm(SwitchNorm):
 class HardRoutingGate(nn.Module):
    def __init__(self, breadth, dropout_rate=.8):
        super().__init__()
-        self.norm = DropoutNorm(breadth, dropout_rate, accumulator_size=2)
+        self.norm = DropoutNorm(breadth, dropout_rate, accumulator_size=128)
    def forward(self, x):
        soft = self.norm(nn.functional.softmax(x, dim=1))