From c203cee31e5e99223bde1b4bda35a8f39c79bec1 Mon Sep 17 00:00:00 2001 From: James Betker Date: Wed, 9 Dec 2020 15:03:59 -0700 Subject: [PATCH] Allow swapping to torch DDP as needed in code --- codes/models/ExtensibleTrainer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/codes/models/ExtensibleTrainer.py b/codes/models/ExtensibleTrainer.py index 2f9c147b..9e38e02e 100644 --- a/codes/models/ExtensibleTrainer.py +++ b/codes/models/ExtensibleTrainer.py @@ -108,6 +108,8 @@ class ExtensibleTrainer(BaseModel): # Use Apex to enable delay_allreduce, which is compatible with gradient checkpointing. from apex.parallel import DistributedDataParallel dnet = DistributedDataParallel(anet, delay_allreduce=True) + #from torch.nn.parallel.distributed import DistributedDataParallel + #dnet = DistributedDataParallel(anet, device_ids=[torch.cuda.current_device()], find_unused_parameters=True) else: dnet = DataParallel(anet, device_ids=opt['gpu_ids']) if self.is_train: