forked from mrq/tortoise-tts
Merge pull request 'feat: support .flac voice files' (#43) from NtTestAlert/tortoise-tts:support_flac_voice into main
Reviewed-on: mrq/tortoise-tts#43
This commit is contained in:
commit
815ae5d707
|
@ -2,6 +2,7 @@ import os
|
||||||
from glob import glob
|
from glob import glob
|
||||||
|
|
||||||
import librosa
|
import librosa
|
||||||
|
import soundfile as sf
|
||||||
import torch
|
import torch
|
||||||
import torchaudio
|
import torchaudio
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -24,6 +25,9 @@ def load_audio(audiopath, sampling_rate):
|
||||||
elif audiopath[-4:] == '.mp3':
|
elif audiopath[-4:] == '.mp3':
|
||||||
audio, lsr = librosa.load(audiopath, sr=sampling_rate)
|
audio, lsr = librosa.load(audiopath, sr=sampling_rate)
|
||||||
audio = torch.FloatTensor(audio)
|
audio = torch.FloatTensor(audio)
|
||||||
|
elif audiopath[-5:] == '.flac':
|
||||||
|
audio, lsr = sf.read(audiopath)
|
||||||
|
audio = torch.FloatTensor(audio)
|
||||||
else:
|
else:
|
||||||
assert False, f"Unsupported audio format provided: {audiopath[-4:]}"
|
assert False, f"Unsupported audio format provided: {audiopath[-4:]}"
|
||||||
|
|
||||||
|
@ -85,7 +89,7 @@ def get_voices(extra_voice_dirs=[], load_latents=True):
|
||||||
for sub in subs:
|
for sub in subs:
|
||||||
subj = os.path.join(d, sub)
|
subj = os.path.join(d, sub)
|
||||||
if os.path.isdir(subj):
|
if os.path.isdir(subj):
|
||||||
voices[sub] = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3'))
|
voices[sub] = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3')) + list(glob(f'{subj}/*.flac'))
|
||||||
if load_latents:
|
if load_latents:
|
||||||
voices[sub] = voices[sub] + list(glob(f'{subj}/*.pth'))
|
voices[sub] = voices[sub] + list(glob(f'{subj}/*.pth'))
|
||||||
return voices
|
return voices
|
||||||
|
|
Loading…
Reference in New Issue
Block a user