From 5a74461c1ea58fab038a90bc91a26c0b6a6d6966 Mon Sep 17 00:00:00 2001
From: Marcus Llewellyn <marcus.llewellyn@gmail.com>
Date: Sat, 4 Jun 2022 17:47:29 -0500
Subject: [PATCH 1/2] read.py combines all candidates

If candidates where greater than 1 on in read.py, only the fist candidate clips would be combined. This adds a bit of code to make a combined file for every candidate.
---
 tortoise/read.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tortoise/read.py b/tortoise/read.py
index b28c8c4..75da75f 100644
--- a/tortoise/read.py
+++ b/tortoise/read.py
@@ -72,11 +72,22 @@ if __name__ == '__main__':
                 gen = gen[0].squeeze(0).cpu()
             all_parts.append(gen)
 
-        full_audio = torch.cat(all_parts, dim=-1)
-        torchaudio.save(os.path.join(voice_outpath, 'combined.wav'), full_audio, 24000)
+        if args.candidates == 1:
+            full_audio = torch.cat(all_parts, dim=-1)
+            torchaudio.save(os.path.join(voice_outpath, 'combined.wav'), full_audio, 24000)
 
         if args.produce_debug_state:
             os.makedirs('debug_states', exist_ok=True)
             dbg_state = (seed, texts, voice_samples, conditioning_latents)
             torch.save(dbg_state, f'debug_states/read_debug_{selected_voice}.pth')
 
+    # Combine each candidate's audio clips.
+    if args.candidates > 1:
+        audio_clips = []
+        for candidate in range(args.candidates):
+            for line in range(len(texts)):
+                wav_file = os.path.join(voice_outpath, str(line), f"{candidate}.wav")
+                audio_clips.append(load_audio(wav_file, 24000))
+            audio_clips = torch.cat(audio_clips, dim=-1)
+            torchaudio.save(os.path.join(voice_outpath, f"combined_{candidate:02d}.wav"), audio_clips, 24000)
+            audio_clips = []
\ No newline at end of file

From 0e08760896d99abae50ae67d181bb149528180ab Mon Sep 17 00:00:00 2001
From: Marcus Llewellyn <marcus.llewellyn@gmail.com>
Date: Mon, 6 Jun 2022 15:13:29 -0500
Subject: [PATCH 2/2] Fixed silly lack of EOF blank line, indentation

---
 tortoise/read.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tortoise/read.py b/tortoise/read.py
index 75da75f..05b6658 100644
--- a/tortoise/read.py
+++ b/tortoise/read.py
@@ -81,13 +81,13 @@ if __name__ == '__main__':
             dbg_state = (seed, texts, voice_samples, conditioning_latents)
             torch.save(dbg_state, f'debug_states/read_debug_{selected_voice}.pth')
 
-    # Combine each candidate's audio clips.
-    if args.candidates > 1:
-        audio_clips = []
-        for candidate in range(args.candidates):
-            for line in range(len(texts)):
-                wav_file = os.path.join(voice_outpath, str(line), f"{candidate}.wav")
-                audio_clips.append(load_audio(wav_file, 24000))
-            audio_clips = torch.cat(audio_clips, dim=-1)
-            torchaudio.save(os.path.join(voice_outpath, f"combined_{candidate:02d}.wav"), audio_clips, 24000)
-            audio_clips = []
\ No newline at end of file
+        # Combine each candidate's audio clips.
+        if args.candidates > 1:
+            audio_clips = []
+            for candidate in range(args.candidates):
+                for line in range(len(texts)):
+                    wav_file = os.path.join(voice_outpath, str(line), f"{candidate}.wav")
+                    audio_clips.append(load_audio(wav_file, 24000))
+                audio_clips = torch.cat(audio_clips, dim=-1)
+                torchaudio.save(os.path.join(voice_outpath, f"combined_{candidate:02d}.wav"), audio_clips, 24000)
+                audio_clips = []