From 48cb6a5abd3b159c50bc73dee302b6ae68893302 Mon Sep 17 00:00:00 2001
From: James Betker <jbetker@gmail.com>
Date: Sat, 16 Apr 2022 20:28:04 -0600
Subject: [PATCH] misc

---
 codes/data/audio/unsupervised_audio_dataset.py | 10 +++++-----
 codes/models/clip/clvp.py                      | 13 -------------
 codes/train.py                                 |  2 +-
 3 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/codes/data/audio/unsupervised_audio_dataset.py b/codes/data/audio/unsupervised_audio_dataset.py
index d95afe05..8ffde8bb 100644
--- a/codes/data/audio/unsupervised_audio_dataset.py
+++ b/codes/data/audio/unsupervised_audio_dataset.py
@@ -176,15 +176,15 @@ class UnsupervisedAudioDataset(torch.utils.data.Dataset):
 if __name__ == '__main__':
     params = {
         'mode': 'unsupervised_audio',
-        'path': ['\\\\192.168.5.3\\rtx3080_audio\\split\\cleaned\\books0'],
-        'cache_path': 'E:\\audio\\remote-cache3.pth',
+        'path': ['Y:\\split\\yt-music'],
+        'cache_path': 'Y:\\split\\yt-music\\cache-windows.pth',
         'sampling_rate': 22050,
-        'pad_to_samples': 40960,
+        'pad_to_samples': 22050,
         'phase': 'train',
         'n_workers': 1,
         'batch_size': 16,
         'extra_samples': 4,
-        'resample_clip': True,
+        'resample_clip': False,
     }
     from data import create_dataset, create_dataloader
 
@@ -195,5 +195,5 @@ if __name__ == '__main__':
         for b_ in range(b['clip'].shape[0]):
             #pass
             torchaudio.save(f'{i}_clip_{b_}.wav', b['clip'][b_], ds.sampling_rate)
-            torchaudio.save(f'{i}_resampled_clip_{b_}.wav', b['resampled_clip'][b_], ds.sampling_rate)
+            #torchaudio.save(f'{i}_resampled_clip_{b_}.wav', b['resampled_clip'][b_], ds.sampling_rate)
             i += 1
diff --git a/codes/models/clip/clvp.py b/codes/models/clip/clvp.py
index e95bb4e7..7626c7ca 100644
--- a/codes/models/clip/clvp.py
+++ b/codes/models/clip/clvp.py
@@ -86,7 +86,6 @@ class CLVP(nn.Module):
             speech_enc_depth=6,
             speech_mask_percentage=0,
             latent_multiplier=4,
-            is_distributed=False,
     ):
         super().__init__()
         latent_dim = latent_multiplier*model_dim
@@ -102,8 +101,6 @@ class CLVP(nn.Module):
         self.text_transformer = CollapsingTransformer(model_dim, latent_dim, transformer_heads, dropout, text_enc_depth, text_mask_percentage, use_rms_scaleshift_norm=True)
         self.to_text_latent = nn.Linear(latent_dim, latent_dim, bias=False)
 
-        self.distributed = is_distributed
-
         if mel_codes is None:
             self.speech_emb = nn.Conv1d(mel_channels, model_dim, kernel_size=5, padding=2)
         else:
@@ -143,16 +140,6 @@ class CLVP(nn.Module):
 
         text_latents = self.to_text_latent(enc_text)
         speech_latents = self.to_speech_latent(enc_speech)
-        if self.distributed:
-            ws = get_world_size()
-            text_gather_cells = [torch.zeros_like(text_latents) for _ in range(ws)]
-            speech_gather_cells = [torch.zeros_like(speech_latents) for _ in range(ws)]
-            distributed.all_gather(text_gather_cells, text_latents)
-            text_gather_cells[distributed.get_rank()] = text_latents  # Propagate gradients in this way.
-            text_latents = torch.cat(text_gather_cells, dim=0)
-            distributed.all_gather(speech_gather_cells, speech_latents)
-            speech_gather_cells[distributed.get_rank()] = speech_latents
-            speech_latents = torch.cat(speech_gather_cells, dim=0)
 
         text_latents, speech_latents = map(lambda t: F.normalize(t, p=2, dim=-1), (text_latents, speech_latents))
         temp = self.temperature.exp()
diff --git a/codes/train.py b/codes/train.py
index 70578721..e47aad76 100644
--- a/codes/train.py
+++ b/codes/train.py
@@ -327,7 +327,7 @@ class Trainer:
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_clip_text_to_voice.yml')
+    parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_cvvp_codes.yml')
     parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher')
     args = parser.parse_args()
     opt = option.parse(args.opt, is_train=True)