forked from camenduru/ai-voice-cloning
fixes
This commit is contained in:
parent
54f2fc792a
commit
f9154c4db1
|
@ -1 +1 @@
|
||||||
Subproject commit 1f674a468f4202ac47feb8fb3587dc5837f2af2b
|
Subproject commit e201746eeb3f5be602ae3395df8344f231a5f0d4
|
23
src/utils.py
23
src/utils.py
|
@ -1214,7 +1214,7 @@ def slice_waveform( waveform, sample_rate, start, end, trim ):
|
||||||
|
|
||||||
return sliced, error
|
return sliced, error
|
||||||
|
|
||||||
def slice_dataset( voice, trim_silence=True, start_offset=0, end_offset=0, results=None ):
|
def slice_dataset( voice, trim_silence=True, start_offset=0, end_offset=0, results=None, progress=gr.Progress() ):
|
||||||
indir = f'./training/{voice}/'
|
indir = f'./training/{voice}/'
|
||||||
infile = f'{indir}/whisper.json'
|
infile = f'{indir}/whisper.json'
|
||||||
messages = []
|
messages = []
|
||||||
|
@ -1269,12 +1269,14 @@ def phonemizer( text, language="eng" ):
|
||||||
return ["_" if p in ignored else p for p in phones]
|
return ["_" if p in ignored else p for p in phones]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, normalize=True ):
|
def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, normalize=True, progress=gr.Progress() ):
|
||||||
indir = f'./training/{voice}/'
|
indir = f'./training/{voice}/'
|
||||||
infile = f'{indir}/whisper.json'
|
infile = f'{indir}/whisper.json'
|
||||||
messages = []
|
messages = []
|
||||||
|
|
||||||
phonemize = phonemize=args.tokenizer_json[-8:] == "ipa.json"
|
phonemize = args.tokenizer_json is not None and phonemize=args.tokenizer_json[-8:] == "ipa.json"
|
||||||
|
if args.tts_backend == "vall-e":
|
||||||
|
phonemize = True
|
||||||
|
|
||||||
if not os.path.exists(infile):
|
if not os.path.exists(infile):
|
||||||
raise Exception(f"Missing dataset: {infile}")
|
raise Exception(f"Missing dataset: {infile}")
|
||||||
|
@ -1283,11 +1285,10 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, n
|
||||||
|
|
||||||
lines = {
|
lines = {
|
||||||
'training': [],
|
'training': [],
|
||||||
'validation': [],
|
'validation': []
|
||||||
'recordings': [],
|
|
||||||
'supervisions': [],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
already_segmented = []
|
||||||
|
|
||||||
errored = 0
|
errored = 0
|
||||||
for filename in results:
|
for filename in results:
|
||||||
|
@ -1328,11 +1329,13 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, n
|
||||||
|
|
||||||
segments = result['segments'] if use_segment else [{'text': result['text']}]
|
segments = result['segments'] if use_segment else [{'text': result['text']}]
|
||||||
|
|
||||||
for segment in segments:
|
for segment in enumerate_progress(segments, desc="Parsing segments", progress=progress):
|
||||||
file = filename.replace(".wav", f"_{pad(segment['id'], 4)}.wav") if use_segment else filename
|
file = filename.replace(".wav", f"_{pad(segment['id'], 4)}.wav") if use_segment else filename
|
||||||
path = f'{indir}/audio/{file}'
|
path = f'{indir}/audio/{file}'
|
||||||
# segment when needed
|
# segment when needed
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path) and filename not in already_segmented:
|
||||||
|
already_segmented.append(filename)
|
||||||
|
|
||||||
tmp_results = {}
|
tmp_results = {}
|
||||||
tmp_results[filename] = result
|
tmp_results[filename] = result
|
||||||
print(f"Audio not segmented, segmenting: {filename}")
|
print(f"Audio not segmented, segmenting: {filename}")
|
||||||
|
@ -2360,9 +2363,9 @@ def update_tokenizer(tokenizer_json):
|
||||||
if hasattr(tts, "loading") and tts.loading:
|
if hasattr(tts, "loading") and tts.loading:
|
||||||
raise Exception("TTS is still initializing...")
|
raise Exception("TTS is still initializing...")
|
||||||
|
|
||||||
print(f"Loading model: {tokenizer_json}")
|
print(f"Loading tokenizer vocab: {tokenizer_json}")
|
||||||
tts.load_tokenizer_json(tokenizer_json)
|
tts.load_tokenizer_json(tokenizer_json)
|
||||||
print(f"Loaded model: {tts.tokenizer_json}")
|
print(f"Loaded tokenizer vocab: {tts.tokenizer_json}")
|
||||||
|
|
||||||
do_gc()
|
do_gc()
|
||||||
|
|
||||||
|
|
|
@ -199,10 +199,10 @@ def prepare_dataset_proxy( voice, language, validation_text_length, validation_a
|
||||||
messages.append(message)
|
messages.append(message)
|
||||||
|
|
||||||
if slice_audio:
|
if slice_audio:
|
||||||
message = slice_dataset( voice, trim_silence=trim_silence, start_offset=slice_start_offset, end_offset=slice_end_offset )
|
message = slice_dataset( voice, trim_silence=trim_silence, start_offset=slice_start_offset, end_offset=slice_end_offset, progress=progress )
|
||||||
messages.append(message)
|
messages.append(message)
|
||||||
|
|
||||||
message = prepare_dataset( voice, use_segments=slice_audio, text_length=validation_text_length, audio_length=validation_audio_length )
|
message = prepare_dataset( voice, use_segments=slice_audio, text_length=validation_text_length, audio_length=validation_audio_length, progress=progress )
|
||||||
messages.append(message)
|
messages.append(message)
|
||||||
|
|
||||||
return "\n".join(messages)
|
return "\n".join(messages)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user