diff --git a/codes/models/steps/losses.py b/codes/models/steps/losses.py
index e31d5a64..830ff6b2 100644
--- a/codes/models/steps/losses.py
+++ b/codes/models/steps/losses.py
@@ -202,6 +202,7 @@ class DiscriminatorGanLoss(ConfigurableLoss):
         # generators and discriminators by essentially having them skip steps while their counterparts "catch up".
         self.min_loss = opt['min_loss'] if 'min_loss' in opt.keys() else 0
         if self.min_loss != 0:
+            assert self.env['rank'] == 0   # distributed training does not support 'min_loss' - it can result in backward() desync by design.
             self.loss_rotating_buffer = torch.zeros(10, requires_grad=False)
             self.rb_ptr = 0
             self.losses_computed = 0
diff --git a/codes/models/steps/steps.py b/codes/models/steps/steps.py
index a8e0bdb2..a6b570e2 100644
--- a/codes/models/steps/steps.py
+++ b/codes/models/steps/steps.py
@@ -126,48 +126,49 @@ class ConfigurableStep(Module):
         self.env['current_step_optimizers'] = self.optimizers
         self.env['training'] = train
 
-        # Inject in any extra dependencies.
-        for inj in self.injectors:
-            # Don't do injections tagged with eval unless we are not in train mode.
-            if train and 'eval' in inj.opt.keys() and inj.opt['eval']:
-                continue
-            # Likewise, don't do injections tagged with train unless we are not in eval.
-            if not train and 'train' in inj.opt.keys() and inj.opt['train']:
-                continue
-            # Don't do injections tagged with 'after' or 'before' when we are out of spec.
-            if 'after' in inj.opt.keys() and self.env['step'] < inj.opt['after'] or \
-                'before' in inj.opt.keys() and self.env['step'] > inj.opt['before']:
-                continue
-            injected = inj(local_state)
-            local_state.update(injected)
-            new_state.update(injected)
-
-        if train and len(self.losses) > 0:
-            # Finally, compute the losses.
-            total_loss = 0
-            for loss_name, loss in self.losses.items():
-                # Some losses only activate after a set number of steps. For example, proto-discriminator losses can
-                # be very disruptive to a generator.
-                if 'after' in loss.opt.keys() and loss.opt['after'] > self.env['step']:
+        with self.get_network_for_name(self.get_networks_trained()[0]).join():
+            # Inject in any extra dependencies.
+            for inj in self.injectors:
+                # Don't do injections tagged with eval unless we are not in train mode.
+                if train and 'eval' in inj.opt.keys() and inj.opt['eval']:
                     continue
-                l = loss(self.training_net, local_state)
-                total_loss += l * self.weights[loss_name]
-                # Record metrics.
-                if isinstance(l, torch.Tensor):
-                    self.loss_accumulator.add_loss(loss_name, l)
-                for n, v in loss.extra_metrics():
-                    self.loss_accumulator.add_loss("%s_%s" % (loss_name, n), v)
-                loss.clear_metrics()
+                # Likewise, don't do injections tagged with train unless we are not in eval.
+                if not train and 'train' in inj.opt.keys() and inj.opt['train']:
+                    continue
+                # Don't do injections tagged with 'after' or 'before' when we are out of spec.
+                if 'after' in inj.opt.keys() and self.env['step'] < inj.opt['after'] or \
+                   'before' in inj.opt.keys() and self.env['step'] > inj.opt['before']:
+                    continue
+                injected = inj(local_state)
+                local_state.update(injected)
+                new_state.update(injected)
 
-            # In some cases, the loss could not be set (e.g. all losses have 'after'
-            if isinstance(total_loss, torch.Tensor):
-                self.loss_accumulator.add_loss("%s_total" % (self.get_training_network_name(),), total_loss)
-                # Scale the loss down by the accumulation factor.
-                total_loss = total_loss / self.env['mega_batch_factor']
+            if train and len(self.losses) > 0:
+                # Finally, compute the losses.
+                total_loss = 0
+                for loss_name, loss in self.losses.items():
+                    # Some losses only activate after a set number of steps. For example, proto-discriminator losses can
+                    # be very disruptive to a generator.
+                    if 'after' in loss.opt.keys() and loss.opt['after'] > self.env['step']:
+                        continue
+                    l = loss(self.training_net, local_state)
+                    total_loss += l * self.weights[loss_name]
+                    # Record metrics.
+                    if isinstance(l, torch.Tensor):
+                        self.loss_accumulator.add_loss(loss_name, l)
+                    for n, v in loss.extra_metrics():
+                        self.loss_accumulator.add_loss("%s_%s" % (loss_name, n), v)
+                        loss.clear_metrics()
 
-                # Get dem grads!
-                self.scaler.scale(total_loss).backward()
-                self.grads_generated = True
+                # In some cases, the loss could not be set (e.g. all losses have 'after')
+                if isinstance(total_loss, torch.Tensor):
+                    self.loss_accumulator.add_loss("%s_total" % (self.get_training_network_name(),), total_loss)
+                    # Scale the loss down by the accumulation factor.
+                    total_loss = total_loss / self.env['mega_batch_factor']
+
+                    # Get dem grads!
+                    self.scaler.scale(total_loss).backward()
+                    self.grads_generated = True
 
         # Detach all state variables. Within the step, gradients can flow. Once these variables leave the step
         # we must release the gradients.
diff --git a/codes/requirements.txt b/codes/requirements.txt
index 54c9ccfc..7bb339a4 100644
--- a/codes/requirements.txt
+++ b/codes/requirements.txt
@@ -1,6 +1,5 @@
 numpy
 opencv-python
-lmdb
 pyyaml
 tb-nightly
 future
@@ -11,4 +10,4 @@ scipy
 munch
 tqdm
 scp
-tensorboard
\ No newline at end of file
+tensorboard
diff --git a/codes/train.py b/codes/train.py
index 8a4fc14b..e4c9e884 100644
--- a/codes/train.py
+++ b/codes/train.py
@@ -46,7 +46,7 @@ class Trainer:
 
         else:
             opt['dist'] = True
-            self.init_dist()
+            self.init_dist('nccl')
             world_size = torch.distributed.get_world_size()
             self.rank = torch.distributed.get_rank()
 
@@ -117,11 +117,11 @@ class Trainer:
                 total_iters = int(opt['train']['niter'])
                 self.total_epochs = int(math.ceil(total_iters / train_size))
                 if opt['dist']:
-                    train_sampler = DistIterSampler(self.train_set, world_size, self.rank, dataset_ratio)
+                    self.train_sampler = DistIterSampler(self.train_set, world_size, self.rank, dataset_ratio)
                     self.total_epochs = int(math.ceil(total_iters / (train_size * dataset_ratio)))
                 else:
-                    train_sampler = None
-                self.train_loader = create_dataloader(self.train_set, dataset_opt, opt, train_sampler)
+                    self.train_sampler = None
+                self.train_loader = create_dataloader(self.train_set, dataset_opt, opt, self.train_sampler)
                 if self.rank <= 0:
                     self.logger.info('Number of train images: {:,d}, iters: {:,d}'.format(
                         len(self.train_set), train_size))
@@ -284,6 +284,7 @@ if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_prog_imgset_multifaceted_chained.yml')
     parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
     args = parser.parse_args()
     opt = option.parse(args.opt, is_train=True)
     trainer = Trainer()