forked from mrq/tortoise-tts
Merge pull request #49 from faad3/main
Fix bug in load_voices in audio.py
This commit is contained in:
commit
fcc7b16677
|
@ -119,14 +119,16 @@ def load_voices(voices):
|
||||||
if latent is None:
|
if latent is None:
|
||||||
assert len(latents) == 0, "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
|
assert len(latents) == 0, "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
|
||||||
clips.extend(clip)
|
clips.extend(clip)
|
||||||
elif voice is None:
|
elif clip is None:
|
||||||
assert len(voices) == 0, "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
|
assert len(clips) == 0, "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
|
||||||
latents.append(latent)
|
latents.append(latent)
|
||||||
if len(latents) == 0:
|
if len(latents) == 0:
|
||||||
return clips, None
|
return clips, None
|
||||||
else:
|
else:
|
||||||
latents = torch.stack(latents, dim=0)
|
latents_0 = torch.stack([l[0] for l in latents], dim=0).mean(dim=0)
|
||||||
return None, latents.mean(dim=0)
|
latents_1 = torch.stack([l[1] for l in latents], dim=0).mean(dim=0)
|
||||||
|
latents = (latents_0,latents_1)
|
||||||
|
return None, latents
|
||||||
|
|
||||||
|
|
||||||
class TacotronSTFT(torch.nn.Module):
|
class TacotronSTFT(torch.nn.Module):
|
||||||
|
@ -178,4 +180,4 @@ def wav_to_univnet_mel(wav, do_normalization=False):
|
||||||
mel = stft.mel_spectrogram(wav)
|
mel = stft.mel_spectrogram(wav)
|
||||||
if do_normalization:
|
if do_normalization:
|
||||||
mel = normalize_tacotron_mel(mel)
|
mel = normalize_tacotron_mel(mel)
|
||||||
return mel
|
return mel
|
||||||
|
|
Loading…
Reference in New Issue
Block a user