From af6d5cd52601bb57da19b96c6fa4b76ad184b244 Mon Sep 17 00:00:00 2001
From: James Betker <jbetker@gmail.com>
Date: Wed, 29 Dec 2021 08:50:49 -0700
Subject: [PATCH] Add resume into speech-speech

---
 codes/scripts/audio/speech_to_speech_clip.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/codes/scripts/audio/speech_to_speech_clip.py b/codes/scripts/audio/speech_to_speech_clip.py
index 2826ba1f..485607c9 100644
--- a/codes/scripts/audio/speech_to_speech_clip.py
+++ b/codes/scripts/audio/speech_to_speech_clip.py
@@ -46,6 +46,11 @@ def process_subdir(subdir, options, clip_sz):
 
     root, paths = subdir
     root = str(root)
+    output_file = os.path.join(root, 'similarities.pth')
+    if os.path.exists(output_file):
+        print(f'{root} already processed. Skipping.')
+        return
+    print(f'Processing {root}..')
 
     clips = []
     for path in paths:
@@ -70,6 +75,8 @@ def process_subdir(subdir, options, clip_sz):
             sims = torch.cat([sims, outp], dim=0)
 
     simmap = {}
+    # TODO: this can be further improved. We're just taking the topk here but, there is no gaurantee that there is 3
+    # samples from the same speaker in any given folder.
     for path, sim in zip(paths, sims):
         n = min(4, len(sim))
         top3 = torch.topk(sim, n)
@@ -82,7 +89,7 @@ def process_subdir(subdir, options, clip_sz):
                 top_ind = top3.indices[i]
                 simpaths.append(os.path.relpath(paths[top_ind], root))
         simmap[rel] = simpaths
-    torch.save(simmap, os.path.join(root, 'similarities.pth'))
+    torch.save(simmap, output_file)
 
 
 if __name__ == '__main__':
@@ -94,7 +101,7 @@ if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('-o', type=str, help='Path to the options YAML file used to train the CLIP model', default='../options/train_voice_voice_clip.yml')
     parser.add_argument('--num_workers', type=int, help='Number concurrent processes to use', default=1)
-    parser.add_argument('--root_path', type=str, help='Root path to search for audio directories from', default='Z:\\clips\\podcasts-0\\7_Joe Rogan Experience #1004 - W. Kamau Bell')
+    parser.add_argument('--root_path', type=str, help='Root path to search for audio directories from', default='Y:\\clips\\podcasts-0\\5177_20190625-Food Waste is Solvable')
     parser.add_argument('--clip_size', type=int, help='Amount of audio samples to pull from each file', default=22050)
     args = parser.parse_args()