Allow passing additional voice directories when loading voices

This commit is contained in:
Johan Nordberg 2022-05-19 11:35:57 +00:00
parent 00730d2786
commit b4fa8c86b9

View File

@ -82,21 +82,23 @@ def dynamic_range_decompression(x, C=1):
return torch.exp(x) / C return torch.exp(x) / C
def get_voices(): def get_voices(extra_voice_dirs=[]):
subs = os.listdir('tortoise/voices') dirs = ['tortoise/voices'] + extra_voice_dirs
voices = {} voices = {}
for sub in subs: for d in dirs:
subj = os.path.join('tortoise/voices', sub) subs = os.listdir(d)
if os.path.isdir(subj): for sub in subs:
voices[sub] = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3')) + list(glob(f'{subj}/*.pth')) subj = os.path.join(d, sub)
if os.path.isdir(subj):
voices[sub] = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3')) + list(glob(f'{subj}/*.pth'))
return voices return voices
def load_voice(voice): def load_voice(voice, extra_voice_dirs=[]):
if voice == 'random': if voice == 'random':
return None, None return None, None
voices = get_voices() voices = get_voices(extra_voice_dirs)
paths = voices[voice] paths = voices[voice]
if len(paths) == 1 and paths[0].endswith('.pth'): if len(paths) == 1 and paths[0].endswith('.pth'):
return None, torch.load(paths[0]) return None, torch.load(paths[0])
@ -108,14 +110,14 @@ def load_voice(voice):
return conds, None return conds, None
def load_voices(voices): def load_voices(voices, extra_voice_dirs=[]):
latents = [] latents = []
clips = [] clips = []
for voice in voices: for voice in voices:
if voice == 'random': if voice == 'random':
print("Cannot combine a random voice with a non-random voice. Just using a random voice.") print("Cannot combine a random voice with a non-random voice. Just using a random voice.")
return None, None return None, None
clip, latent = load_voice(voice) clip, latent = load_voice(voice, extra_voice_dirs)
if latent is None: if latent is None:
assert len(latents) == 0, "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this." assert len(latents) == 0, "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this."
clips.extend(clip) clips.extend(clip)