Merge pull request #49 from faad3/main

Fix bug in load_voices in audio.py
2022-05-17 11:20:44 -06:00 · 2022-05-17 11:20:44 -06:00 · fcc7b16677
commit fcc7b16677
parent e0329de2c2 ef5fb5f5fc
1 changed files with 7 additions and 5 deletions
--- a/tortoise/utils/audio.py
+++ b/tortoise/utils/audio.py
@ -119,14 +119,16 @@ def load_voices(voices):
        if latent is None:
            assert len(latents) == 0, "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
            clips.extend(clip)
-        elif voice is None:
+        elif clip is None:
-            assert len(voices) == 0, "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
+            assert len(clips) == 0, "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
            latents.append(latent)
    if len(latents) == 0:
        return clips, None
    else:
-        latents = torch.stack(latents, dim=0)
+        latents_0 = torch.stack([l[0] for l in latents], dim=0).mean(dim=0)
-        return None, latents.mean(dim=0)
+        latents_1 = torch.stack([l[1] for l in latents], dim=0).mean(dim=0)
        latents = (latents_0,latents_1)
        return None, latents
 class TacotronSTFT(torch.nn.Module):
@ -178,4 +180,4 @@ def wav_to_univnet_mel(wav, do_normalization=False):
    mel = stft.mel_spectrogram(wav)
    if do_normalization:
        mel = normalize_tacotron_mel(mel)
-    return mel
+    return mel