diff --git a/codes/models/classifiers/resnet_with_checkpointing.py b/codes/models/classifiers/resnet_with_checkpointing.py index 95901e74..552d1328 100644 --- a/codes/models/classifiers/resnet_with_checkpointing.py +++ b/codes/models/classifiers/resnet_with_checkpointing.py @@ -3,7 +3,6 @@ import torch import torch.nn as nn from torchvision.models.resnet import BasicBlock, Bottleneck -from torchvision.models.utils import load_state_dict_from_url import torchvision diff --git a/codes/models/classifiers/weighted_conv_resnet.py b/codes/models/classifiers/weighted_conv_resnet.py index 6261e1cc..f69c2cb4 100644 --- a/codes/models/classifiers/weighted_conv_resnet.py +++ b/codes/models/classifiers/weighted_conv_resnet.py @@ -2,7 +2,6 @@ import torch import torchvision from torch import Tensor import torch.nn as nn -from torchvision.models.utils import load_state_dict_from_url from typing import Type, Any, Callable, Union, List, Optional, OrderedDict, Iterator __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', diff --git a/codes/models/segformer/backbone.py b/codes/models/segformer/backbone.py index f9bb04f7..a8c4e55e 100644 --- a/codes/models/segformer/backbone.py +++ b/codes/models/segformer/backbone.py @@ -3,7 +3,6 @@ import torch import torch.nn as nn from torchvision.models.resnet import BasicBlock, Bottleneck -from torchvision.models.utils import load_state_dict_from_url import torchvision diff --git a/codes/trainer/steps.py b/codes/trainer/steps.py index 35214cd5..20ac0a92 100644 --- a/codes/trainer/steps.py +++ b/codes/trainer/steps.py @@ -227,6 +227,8 @@ class ConfigurableStep(Module): new_state.update(lstate) else: l = loss(self.get_network_for_name(self.step_opt['training']), local_state) + if not l.isfinite(): + print(f'!!Detected non-finite loss {loss_name}') total_loss += l * self.weights[loss_name] # Record metrics. if isinstance(l, torch.Tensor):