Merge pull request 'feat: support .flac voice files' (#43) from NtTestAlert/tortoise-tts:support_flac_voice into main

Reviewed-on: mrq/tortoise-tts#43
This commit is contained in:
mrq 2023-04-01 16:37:56 +00:00
commit 815ae5d707

View File

@ -2,6 +2,7 @@ import os
from glob import glob from glob import glob
import librosa import librosa
import soundfile as sf
import torch import torch
import torchaudio import torchaudio
import numpy as np import numpy as np
@ -24,6 +25,9 @@ def load_audio(audiopath, sampling_rate):
elif audiopath[-4:] == '.mp3': elif audiopath[-4:] == '.mp3':
audio, lsr = librosa.load(audiopath, sr=sampling_rate) audio, lsr = librosa.load(audiopath, sr=sampling_rate)
audio = torch.FloatTensor(audio) audio = torch.FloatTensor(audio)
elif audiopath[-5:] == '.flac':
audio, lsr = sf.read(audiopath)
audio = torch.FloatTensor(audio)
else: else:
assert False, f"Unsupported audio format provided: {audiopath[-4:]}" assert False, f"Unsupported audio format provided: {audiopath[-4:]}"
@ -85,7 +89,7 @@ def get_voices(extra_voice_dirs=[], load_latents=True):
for sub in subs: for sub in subs:
subj = os.path.join(d, sub) subj = os.path.join(d, sub)
if os.path.isdir(subj): if os.path.isdir(subj):
voices[sub] = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3')) voices[sub] = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3')) + list(glob(f'{subj}/*.flac'))
if load_latents: if load_latents:
voices[sub] = voices[sub] + list(glob(f'{subj}/*.pth')) voices[sub] = voices[sub] + list(glob(f'{subj}/*.pth'))
return voices return voices