DL-Art-School/codes/utils/loss_accumulator.py

87 lines
3.0 KiB
Python

import torch
# Utility class that stores detached, named losses in a rotating buffer for smooth metric outputting.
from torch import distributed
class LossAccumulator:
def __init__(self, buffer_sz=50):
self.buffer_sz = buffer_sz
self.buffers = {}
self.counters = {}
def add_loss(self, name, tensor):
if name not in self.buffers.keys():
if "_histogram" in name:
tensor = torch.flatten(tensor.detach().cpu())
self.buffers[name] = (0, torch.zeros((self.buffer_sz, tensor.shape[0])), False)
else:
self.buffers[name] = (0, torch.zeros(self.buffer_sz), False)
i, buf, filled = self.buffers[name]
# Can take tensors or just plain python numbers.
if '_histogram' in name:
buf[i] = torch.flatten(tensor.detach().cpu())
elif isinstance(tensor, torch.Tensor):
if distributed.is_available() and distributed.is_initialized():
# Gather the metric from all devices before storing it locally.
distributed.all_reduce(tensor, op=distributed.ReduceOp.SUM)
tensor /= distributed.get_world_size()
buf[i] = tensor.detach().cpu()
else:
buf[i] = tensor
filled = i+1 >= self.buffer_sz or filled
self.buffers[name] = ((i+1) % self.buffer_sz, buf, filled)
def increment_metric(self, name):
if name not in self.counters.keys():
self.counters[name] = 1
else:
self.counters[name] += 1
def as_dict(self):
result = {}
for k, v in self.buffers.items():
i, buf, filled = v
if '_histogram' in k:
result["loss_" + k] = torch.flatten(buf)
if filled:
result["loss_" + k] = torch.mean(buf)
else:
result["loss_" + k] = torch.mean(buf[:i])
for k, v in self.counters.items():
result[k] = v
return result
# Stores losses in an infinitely-sized list.
class InfStorageLossAccumulator:
def __init__(self):
self.buffers = {}
def add_loss(self, name, tensor):
if name not in self.buffers.keys():
if "_histogram" in name:
tensor = torch.flatten(tensor.detach().cpu())
self.buffers[name] = []
else:
self.buffers[name] = []
buf = self.buffers[name]
# Can take tensors or just plain python numbers.
if '_histogram' in name:
buf.append(torch.flatten(tensor.detach().cpu()))
elif isinstance(tensor, torch.Tensor):
buf.append(tensor.detach().cpu())
else:
buf.append(tensor)
def increment_metric(self, name):
pass
def as_dict(self):
result = {}
for k, buf in self.buffers.items():
if '_histogram' in k:
result["loss_" + k] = torch.flatten(buf)
else:
result["loss_" + k] = torch.mean(torch.stack(buf))
return result