From 5a74461c1ea58fab038a90bc91a26c0b6a6d6966 Mon Sep 17 00:00:00 2001 From: Marcus Llewellyn Date: Sat, 4 Jun 2022 17:47:29 -0500 Subject: [PATCH 1/2] read.py combines all candidates If candidates where greater than 1 on in read.py, only the fist candidate clips would be combined. This adds a bit of code to make a combined file for every candidate. --- tortoise/read.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tortoise/read.py b/tortoise/read.py index b28c8c4..75da75f 100644 --- a/tortoise/read.py +++ b/tortoise/read.py @@ -72,11 +72,22 @@ if __name__ == '__main__': gen = gen[0].squeeze(0).cpu() all_parts.append(gen) - full_audio = torch.cat(all_parts, dim=-1) - torchaudio.save(os.path.join(voice_outpath, 'combined.wav'), full_audio, 24000) + if args.candidates == 1: + full_audio = torch.cat(all_parts, dim=-1) + torchaudio.save(os.path.join(voice_outpath, 'combined.wav'), full_audio, 24000) if args.produce_debug_state: os.makedirs('debug_states', exist_ok=True) dbg_state = (seed, texts, voice_samples, conditioning_latents) torch.save(dbg_state, f'debug_states/read_debug_{selected_voice}.pth') + # Combine each candidate's audio clips. + if args.candidates > 1: + audio_clips = [] + for candidate in range(args.candidates): + for line in range(len(texts)): + wav_file = os.path.join(voice_outpath, str(line), f"{candidate}.wav") + audio_clips.append(load_audio(wav_file, 24000)) + audio_clips = torch.cat(audio_clips, dim=-1) + torchaudio.save(os.path.join(voice_outpath, f"combined_{candidate:02d}.wav"), audio_clips, 24000) + audio_clips = [] \ No newline at end of file From 0e08760896d99abae50ae67d181bb149528180ab Mon Sep 17 00:00:00 2001 From: Marcus Llewellyn Date: Mon, 6 Jun 2022 15:13:29 -0500 Subject: [PATCH 2/2] Fixed silly lack of EOF blank line, indentation --- tortoise/read.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tortoise/read.py b/tortoise/read.py index 75da75f..05b6658 100644 --- a/tortoise/read.py +++ b/tortoise/read.py @@ -81,13 +81,13 @@ if __name__ == '__main__': dbg_state = (seed, texts, voice_samples, conditioning_latents) torch.save(dbg_state, f'debug_states/read_debug_{selected_voice}.pth') - # Combine each candidate's audio clips. - if args.candidates > 1: - audio_clips = [] - for candidate in range(args.candidates): - for line in range(len(texts)): - wav_file = os.path.join(voice_outpath, str(line), f"{candidate}.wav") - audio_clips.append(load_audio(wav_file, 24000)) - audio_clips = torch.cat(audio_clips, dim=-1) - torchaudio.save(os.path.join(voice_outpath, f"combined_{candidate:02d}.wav"), audio_clips, 24000) - audio_clips = [] \ No newline at end of file + # Combine each candidate's audio clips. + if args.candidates > 1: + audio_clips = [] + for candidate in range(args.candidates): + for line in range(len(texts)): + wav_file = os.path.join(voice_outpath, str(line), f"{candidate}.wav") + audio_clips.append(load_audio(wav_file, 24000)) + audio_clips = torch.cat(audio_clips, dim=-1) + torchaudio.save(os.path.join(voice_outpath, f"combined_{candidate:02d}.wav"), audio_clips, 24000) + audio_clips = []