From db9e9e28a0ec187614fa22b46b20b9d402ad1349 Mon Sep 17 00:00:00 2001
From: James Betker <jbetker@gmail.com>
Date: Thu, 12 Nov 2020 15:43:01 -0700
Subject: [PATCH] Fix an issue where GPU0 was always being used in non-ddp

Frankly, I don't understand how this has ever worked. WTF.
---
 codes/models/ExtensibleTrainer.py | 4 ++--
 codes/train.py                    | 3 ++-
 codes/train2.py                   | 3 ++-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/codes/models/ExtensibleTrainer.py b/codes/models/ExtensibleTrainer.py
index 2a8d9336..81d2746c 100644
--- a/codes/models/ExtensibleTrainer.py
+++ b/codes/models/ExtensibleTrainer.py
@@ -108,14 +108,14 @@ class ExtensibleTrainer(BaseModel):
                                                device_ids=[torch.cuda.current_device()],
                                                find_unused_parameters=False)
             else:
-                dnet = DataParallel(anet)
+                dnet = DataParallel(anet, device_ids=opt['gpu_ids'])
             if self.is_train:
                 dnet.train()
             else:
                 dnet.eval()
             dnets.append(dnet)
         if not opt['dist']:
-            self.netF = DataParallel(self.netF)
+            self.netF = DataParallel(self.netF, device_ids=opt['gpu_ids'])
 
         # Backpush the wrapped networks into the network dicts..
         self.networks = {}
diff --git a/codes/train.py b/codes/train.py
index 14f3f56c..50f6bf14 100644
--- a/codes/train.py
+++ b/codes/train.py
@@ -284,8 +284,9 @@ if __name__ == '__main__':
     if args.launcher == 'none':  # disabled distributed training
         opt['dist'] = False
         trainer.rank = -1
+        if len(opt['gpu_ids']) == 1:
+            torch.cuda.set_device(opt['gpu_ids'][0])
         print('Disabled distributed training.')
-
     else:
         opt['dist'] = True
         init_dist('nccl')
diff --git a/codes/train2.py b/codes/train2.py
index c6db77d1..ab63c7b8 100644
--- a/codes/train2.py
+++ b/codes/train2.py
@@ -284,8 +284,9 @@ if __name__ == '__main__':
     if args.launcher == 'none':  # disabled distributed training
         opt['dist'] = False
         trainer.rank = -1
+        if len(opt['gpu_ids']) == 1:
+            torch.cuda.set_device(opt['gpu_ids'][0])
         print('Disabled distributed training.')
-
     else:
         opt['dist'] = True
         init_dist('nccl')