forked from mrq/DL-Art-School
87 lines
3.0 KiB
Python
87 lines
3.0 KiB
Python
import torch
|
|
|
|
# Utility class that stores detached, named losses in a rotating buffer for smooth metric outputting.
|
|
from torch import distributed
|
|
|
|
|
|
class LossAccumulator:
|
|
def __init__(self, buffer_sz=50):
|
|
self.buffer_sz = buffer_sz
|
|
self.buffers = {}
|
|
self.counters = {}
|
|
|
|
def add_loss(self, name, tensor):
|
|
if name not in self.buffers.keys():
|
|
if "_histogram" in name:
|
|
tensor = torch.flatten(tensor.detach().cpu())
|
|
self.buffers[name] = (0, torch.zeros((self.buffer_sz, tensor.shape[0])), False)
|
|
else:
|
|
self.buffers[name] = (0, torch.zeros(self.buffer_sz), False)
|
|
i, buf, filled = self.buffers[name]
|
|
# Can take tensors or just plain python numbers.
|
|
if '_histogram' in name:
|
|
buf[i] = torch.flatten(tensor.detach().cpu())
|
|
elif isinstance(tensor, torch.Tensor):
|
|
if distributed.is_available() and distributed.is_initialized():
|
|
# Gather the metric from all devices before storing it locally.
|
|
distributed.all_reduce(tensor, op=distributed.ReduceOp.SUM)
|
|
tensor /= distributed.get_world_size()
|
|
buf[i] = tensor.detach().cpu()
|
|
else:
|
|
buf[i] = tensor
|
|
filled = i+1 >= self.buffer_sz or filled
|
|
self.buffers[name] = ((i+1) % self.buffer_sz, buf, filled)
|
|
|
|
def increment_metric(self, name):
|
|
if name not in self.counters.keys():
|
|
self.counters[name] = 1
|
|
else:
|
|
self.counters[name] += 1
|
|
|
|
def as_dict(self):
|
|
result = {}
|
|
for k, v in self.buffers.items():
|
|
i, buf, filled = v
|
|
if '_histogram' in k:
|
|
result["loss_" + k] = torch.flatten(buf)
|
|
if filled:
|
|
result["loss_" + k] = torch.mean(buf)
|
|
else:
|
|
result["loss_" + k] = torch.mean(buf[:i])
|
|
for k, v in self.counters.items():
|
|
result[k] = v
|
|
return result
|
|
|
|
|
|
# Stores losses in an infinitely-sized list.
|
|
class InfStorageLossAccumulator:
|
|
def __init__(self):
|
|
self.buffers = {}
|
|
|
|
def add_loss(self, name, tensor):
|
|
if name not in self.buffers.keys():
|
|
if "_histogram" in name:
|
|
tensor = torch.flatten(tensor.detach().cpu())
|
|
self.buffers[name] = []
|
|
else:
|
|
self.buffers[name] = []
|
|
buf = self.buffers[name]
|
|
# Can take tensors or just plain python numbers.
|
|
if '_histogram' in name:
|
|
buf.append(torch.flatten(tensor.detach().cpu()))
|
|
elif isinstance(tensor, torch.Tensor):
|
|
buf.append(tensor.detach().cpu())
|
|
else:
|
|
buf.append(tensor)
|
|
|
|
def increment_metric(self, name):
|
|
pass
|
|
|
|
def as_dict(self):
|
|
result = {}
|
|
for k, buf in self.buffers.items():
|
|
if '_histogram' in k:
|
|
result["loss_" + k] = torch.flatten(buf)
|
|
else:
|
|
result["loss_" + k] = torch.mean(torch.stack(buf))
|
|
return result |