forked from mrq/ai-voice-cloning
modified logic to determine valid voice folders, also allows subdirs within the folder (for example: ./voices/SH/james/ will be named SH/james)
This commit is contained in:
parent
02beb1dd8e
commit
faa8da12d7
52
src/utils.py
52
src/utils.py
|
@ -32,6 +32,7 @@ import gradio as gr
|
||||||
import gradio.utils
|
import gradio.utils
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
from glob import glob
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
|
@ -1709,7 +1710,7 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non
|
||||||
|
|
||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
files = sorted( get_voices(load_latents=False)[voice] )
|
files = get_voice(voice, load_latents=False)
|
||||||
indir = f'./training/{voice}/'
|
indir = f'./training/{voice}/'
|
||||||
infile = f'{indir}/whisper.json'
|
infile = f'{indir}/whisper.json'
|
||||||
|
|
||||||
|
@ -2104,9 +2105,15 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
|
||||||
phn_file = jobs['phonemize'][0][i]
|
phn_file = jobs['phonemize'][0][i]
|
||||||
normalized = jobs['phonemize'][1][i]
|
normalized = jobs['phonemize'][1][i]
|
||||||
|
|
||||||
phonemized = valle_phonemize( normalized )
|
try:
|
||||||
open(phn_file, 'w', encoding='utf-8').write(" ".join(phonemized))
|
phonemized = valle_phonemize( normalized )
|
||||||
print("Phonemized:", phn_file)
|
open(phn_file, 'w', encoding='utf-8').write(" ".join(phonemized))
|
||||||
|
print("Phonemized:", phn_file)
|
||||||
|
except Exception as e:
|
||||||
|
message = f"Failed to phonemize: {phn_file}: {normalized}"
|
||||||
|
messages.append(message)
|
||||||
|
print(message)
|
||||||
|
|
||||||
|
|
||||||
training_joined = "\n".join(lines['training'])
|
training_joined = "\n".join(lines['training'])
|
||||||
validation_joined = "\n".join(lines['validation'])
|
validation_joined = "\n".join(lines['validation'])
|
||||||
|
@ -2431,12 +2438,47 @@ def import_voices(files, saveAs=None, progress=None):
|
||||||
def relative_paths( dirs ):
|
def relative_paths( dirs ):
|
||||||
return [ './' + os.path.relpath( d ).replace("\\", "/") for d in dirs ]
|
return [ './' + os.path.relpath( d ).replace("\\", "/") for d in dirs ]
|
||||||
|
|
||||||
|
def get_voice( name, dir=get_voice_dir(), load_latents=True ):
|
||||||
|
subj = f'{dir}/{name}/'
|
||||||
|
if not os.path.isdir(subj):
|
||||||
|
return
|
||||||
|
|
||||||
|
voice = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3')) + list(glob(f'{subj}/*.flac'))
|
||||||
|
if load_latents:
|
||||||
|
voice = voice + list(glob(f'{subj}/*.pth'))
|
||||||
|
return sorted( voice )
|
||||||
|
|
||||||
def get_voice_list(dir=get_voice_dir(), append_defaults=False):
|
def get_voice_list(dir=get_voice_dir(), append_defaults=False):
|
||||||
defaults = [ "random", "microphone" ]
|
defaults = [ "random", "microphone" ]
|
||||||
os.makedirs(dir, exist_ok=True)
|
os.makedirs(dir, exist_ok=True)
|
||||||
res = sorted([d for d in os.listdir(dir) if d not in defaults and os.path.isdir(os.path.join(dir, d)) and len(os.listdir(os.path.join(dir, d))) > 0 ])
|
#res = sorted([d for d in os.listdir(dir) if d not in defaults and os.path.isdir(os.path.join(dir, d)) and len(os.listdir(os.path.join(dir, d))) > 0 ])
|
||||||
|
|
||||||
|
res = []
|
||||||
|
for name in os.listdir(dir):
|
||||||
|
if name in defaults:
|
||||||
|
continue
|
||||||
|
if not os.path.isdir(f'{dir}/{name}'):
|
||||||
|
continue
|
||||||
|
if len(os.listdir(os.path.join(dir, name))) == 0:
|
||||||
|
continue
|
||||||
|
files = get_voice( name, dir=dir )
|
||||||
|
|
||||||
|
if len(files) > 0:
|
||||||
|
res.append(name)
|
||||||
|
else:
|
||||||
|
for subdir in os.listdir(f'{dir}/{name}'):
|
||||||
|
if not os.path.isdir(f'{dir}/{name}/{subdir}'):
|
||||||
|
continue
|
||||||
|
files = get_voice( f'{name}/{subdir}', dir=dir )
|
||||||
|
if len(files) == 0:
|
||||||
|
continue
|
||||||
|
res.append(f'{name}/{subdir}')
|
||||||
|
|
||||||
|
res = sorted(res)
|
||||||
|
|
||||||
if append_defaults:
|
if append_defaults:
|
||||||
res = res + defaults
|
res = res + defaults
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def get_valle_models(dir="./training/"):
|
def get_valle_models(dir="./training/"):
|
||||||
|
|
|
@ -201,7 +201,7 @@ def diarize_dataset( voice, progress=gr.Progress(track_tqdm=False) ):
|
||||||
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=args.hf_token)
|
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=args.hf_token)
|
||||||
|
|
||||||
messages = []
|
messages = []
|
||||||
files = sorted( get_voices(load_latents=False)[voice] )
|
files = get_voice(voice, load_latents=False)
|
||||||
for file in enumerate_progress(files, desc="Iterating through voice files", progress=progress):
|
for file in enumerate_progress(files, desc="Iterating through voice files", progress=progress):
|
||||||
diarization = pipeline(file)
|
diarization = pipeline(file)
|
||||||
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
||||||
|
@ -217,15 +217,12 @@ def prepare_all_datasets( language, validation_text_length, validation_audio_len
|
||||||
messages = []
|
messages = []
|
||||||
voices = get_voice_list()
|
voices = get_voice_list()
|
||||||
|
|
||||||
"""
|
|
||||||
for voice in voices:
|
|
||||||
message = prepare_dataset_proxy(voice, **kwargs)
|
|
||||||
messages.append(message)
|
|
||||||
"""
|
"""
|
||||||
for voice in voices:
|
for voice in voices:
|
||||||
print("Processing:", voice)
|
print("Processing:", voice)
|
||||||
message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )
|
message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )
|
||||||
messages.append(message)
|
messages.append(message)
|
||||||
|
"""
|
||||||
|
|
||||||
if slice_audio:
|
if slice_audio:
|
||||||
for voice in voices:
|
for voice in voices:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user