From 909754cc279cc8a032c67c1f3381081a1745581d Mon Sep 17 00:00:00 2001 From: James Betker Date: Wed, 25 Aug 2021 18:00:43 -0600 Subject: [PATCH] Add find_faulty_files.py --- codes/models/gpt_voice/lucidrains_dvae.py | 7 +- codes/scripts/find_faulty_files.py | 86 +++++++++++++++++++++++ codes/trainer/networks.py | 2 +- codes/trainer/optimizers/sgd.py | 4 +- 4 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 codes/scripts/find_faulty_files.py diff --git a/codes/models/gpt_voice/lucidrains_dvae.py b/codes/models/gpt_voice/lucidrains_dvae.py index 9b89ffaf..6d81dab2 100644 --- a/codes/models/gpt_voice/lucidrains_dvae.py +++ b/codes/models/gpt_voice/lucidrains_dvae.py @@ -211,7 +211,7 @@ class DiscreteVAE(nn.Module): out = self.decode(codes) # reconstruction loss - recon_loss = self.loss_fn(img, out) + recon_loss = self.loss_fn(img, out, reduction='none') # This is so we can debug the distribution of codes being learned. if self.record_codes and self.internal_step % 50 == 0: @@ -236,7 +236,8 @@ if __name__ == '__main__': #v = DiscreteVAE() #o=v(torch.randn(1,3,256,256)) #print(o.shape) - v = DiscreteVAE(channels=1, normalization=None, positional_dims=1, num_tokens=4096, codebook_dim=4096, hidden_dim=256, stride=4, num_resnet_blocks=1, kernel_size=5, num_layers=5, use_transposed_convs=False) + v = DiscreteVAE(channels=80, normalization=None, positional_dims=1, num_tokens=4096, codebook_dim=4096, + hidden_dim=256, stride=2, num_resnet_blocks=2, kernel_size=3, num_layers=2, use_transposed_convs=False) #v.eval() - o=v(torch.randn(1,1,4096)) + o=v(torch.randn(1,80,256)) print(o[-1].shape) diff --git a/codes/scripts/find_faulty_files.py b/codes/scripts/find_faulty_files.py new file mode 100644 index 00000000..f860ec46 --- /dev/null +++ b/codes/scripts/find_faulty_files.py @@ -0,0 +1,86 @@ +import os.path as osp +import logging +import random +import time +import argparse +from collections import OrderedDict + +import utils +import utils.options as option +import utils.util as util +from trainer.ExtensibleTrainer import ExtensibleTrainer +from data import create_dataset, create_dataloader +from tqdm import tqdm +import torch +import numpy as np + +current_batch = None + +class LossWrapper: + def __init__(self, lwrap): + self.lwrap = lwrap + self.opt = lwrap.opt + + def is_stateful(self): + return self.lwrap.is_stateful() + + def extra_metrics(self): + return self.lwrap.extra_metrics() + + def clear_metrics(self): + self.lwrap.clear_metrics() + + def __call__(self, m, state): + global current_batch + val = state[self.lwrap.key] + assert val.shape[0] == len(current_batch['path']) + val = val.view(val.shape[0], -1) + val = val.mean(dim=1) + errant = torch.nonzero(val > .5) + for i in errant: + print(f"ERRANT FOUND: {val[i]} path: {current_batch['path'][i]}") + return self.lwrap(m, state) + + +# Script that builds an ExtensibleTrainer, then a pertinent loss with the above LossWrapper. The +# LossWrapper then croaks when it finds an input that produces a divergent loss +if __name__ == "__main__": + # Set seeds + torch.manual_seed(5555) + random.seed(5555) + np.random.seed(5555) + + #### options + torch.backends.cudnn.benchmark = True + want_metrics = False + parser = argparse.ArgumentParser() + parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/train_lrdvae_audio_clips.yml') + opt = option.parse(parser.parse_args().opt, is_train=True) + opt = option.dict_to_nonedict(opt) + utils.util.loaded_options = opt + + util.mkdirs( + (path for key, path in opt['path'].items() + if not key == 'experiments_root' and 'pretrain_model' not in key and 'resume' not in key)) + util.setup_logger('base', opt['path']['log'], 'test_' + opt['name'], level=logging.INFO, + screen=True, tofile=True) + logger = logging.getLogger('base') + logger.info(option.dict2str(opt)) + + #### Create test dataset and dataloader + dataset = create_dataset(opt['datasets']['train']) + dataloader = create_dataloader(dataset, opt['datasets']['train']) + logger.info('Number of test images in [{:s}]: {:d}'.format(opt['datasets']['train']['name'], len(dataset))) + + model = ExtensibleTrainer(opt) + assert len(model.steps) == 1 + + step = model.steps[0] + step.losses['reconstruction_loss'] = LossWrapper(step.losses['reconstruction_loss']) + + for i, data in enumerate(tqdm(dataloader)): + current_batch = data + model.feed_data(data, i) + model.optimize_parameters(i) + + diff --git a/codes/trainer/networks.py b/codes/trainer/networks.py index a68a2ee9..f73a15c0 100644 --- a/codes/trainer/networks.py +++ b/codes/trainer/networks.py @@ -38,7 +38,7 @@ def find_registered_model_fns(base_path='models'): module_iter = pkgutil.walk_packages([base_path]) for mod in module_iter: if os.name == 'nt': - if os.getcwd() not in mod.module_finder.path: + if os.path.join(os.getcwd(), base_path) not in mod.module_finder.path: continue # I have no idea why this is necessary - I think it's a bug in the latest PyWindows release. if mod.ispkg: EXCLUSION_LIST = ['flownet2'] diff --git a/codes/trainer/optimizers/sgd.py b/codes/trainer/optimizers/sgd.py index a55f0d7a..7fa3bb08 100644 --- a/codes/trainer/optimizers/sgd.py +++ b/codes/trainer/optimizers/sgd.py @@ -5,7 +5,7 @@ from torch.optim import Optimizer class SGDNoBiasMomentum(Optimizer): r""" Copy of pytorch implementation of SGD with a modification which turns off momentum for params marked - with `is_bn` or `is_bias`. + with `is_norm` or `is_bias`. """ def __init__(self, params, lr, momentum=0, dampening=0, @@ -54,7 +54,7 @@ class SGDNoBiasMomentum(Optimizer): if weight_decay != 0: d_p = d_p.add(p, alpha=weight_decay) # **this is the only modification over standard torch.optim.SGD: - is_bn_or_bias = (hasattr(p, 'is_norm') and p.is_bn) or (hasattr(p, 'is_bias') and p.is_bias) + is_bn_or_bias = (hasattr(p, 'is_norm') and p.is_norm) or (hasattr(p, 'is_bias') and p.is_bias) if not is_bn_or_bias and momentum != 0: param_state = self.state[p] if 'momentum_buffer' not in param_state: