This commit is contained in:
mrq 2023-03-16 14:19:56 +00:00
parent 54f2fc792a
commit f9154c4db1
3 changed files with 16 additions and 13 deletions

@ -1 +1 @@
Subproject commit 1f674a468f4202ac47feb8fb3587dc5837f2af2b Subproject commit e201746eeb3f5be602ae3395df8344f231a5f0d4

View File

@ -1214,7 +1214,7 @@ def slice_waveform( waveform, sample_rate, start, end, trim ):
return sliced, error return sliced, error
def slice_dataset( voice, trim_silence=True, start_offset=0, end_offset=0, results=None ): def slice_dataset( voice, trim_silence=True, start_offset=0, end_offset=0, results=None, progress=gr.Progress() ):
indir = f'./training/{voice}/' indir = f'./training/{voice}/'
infile = f'{indir}/whisper.json' infile = f'{indir}/whisper.json'
messages = [] messages = []
@ -1269,12 +1269,14 @@ def phonemizer( text, language="eng" ):
return ["_" if p in ignored else p for p in phones] return ["_" if p in ignored else p for p in phones]
""" """
def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, normalize=True ): def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, normalize=True, progress=gr.Progress() ):
indir = f'./training/{voice}/' indir = f'./training/{voice}/'
infile = f'{indir}/whisper.json' infile = f'{indir}/whisper.json'
messages = [] messages = []
phonemize = phonemize=args.tokenizer_json[-8:] == "ipa.json" phonemize = args.tokenizer_json is not None and phonemize=args.tokenizer_json[-8:] == "ipa.json"
if args.tts_backend == "vall-e":
phonemize = True
if not os.path.exists(infile): if not os.path.exists(infile):
raise Exception(f"Missing dataset: {infile}") raise Exception(f"Missing dataset: {infile}")
@ -1283,11 +1285,10 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, n
lines = { lines = {
'training': [], 'training': [],
'validation': [], 'validation': []
'recordings': [],
'supervisions': [],
} }
already_segmented = []
errored = 0 errored = 0
for filename in results: for filename in results:
@ -1328,11 +1329,13 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, n
segments = result['segments'] if use_segment else [{'text': result['text']}] segments = result['segments'] if use_segment else [{'text': result['text']}]
for segment in segments: for segment in enumerate_progress(segments, desc="Parsing segments", progress=progress):
file = filename.replace(".wav", f"_{pad(segment['id'], 4)}.wav") if use_segment else filename file = filename.replace(".wav", f"_{pad(segment['id'], 4)}.wav") if use_segment else filename
path = f'{indir}/audio/{file}' path = f'{indir}/audio/{file}'
# segment when needed # segment when needed
if not os.path.exists(path): if not os.path.exists(path) and filename not in already_segmented:
already_segmented.append(filename)
tmp_results = {} tmp_results = {}
tmp_results[filename] = result tmp_results[filename] = result
print(f"Audio not segmented, segmenting: {filename}") print(f"Audio not segmented, segmenting: {filename}")
@ -2360,9 +2363,9 @@ def update_tokenizer(tokenizer_json):
if hasattr(tts, "loading") and tts.loading: if hasattr(tts, "loading") and tts.loading:
raise Exception("TTS is still initializing...") raise Exception("TTS is still initializing...")
print(f"Loading model: {tokenizer_json}") print(f"Loading tokenizer vocab: {tokenizer_json}")
tts.load_tokenizer_json(tokenizer_json) tts.load_tokenizer_json(tokenizer_json)
print(f"Loaded model: {tts.tokenizer_json}") print(f"Loaded tokenizer vocab: {tts.tokenizer_json}")
do_gc() do_gc()

View File

@ -199,10 +199,10 @@ def prepare_dataset_proxy( voice, language, validation_text_length, validation_a
messages.append(message) messages.append(message)
if slice_audio: if slice_audio:
message = slice_dataset( voice, trim_silence=trim_silence, start_offset=slice_start_offset, end_offset=slice_end_offset ) message = slice_dataset( voice, trim_silence=trim_silence, start_offset=slice_start_offset, end_offset=slice_end_offset, progress=progress )
messages.append(message) messages.append(message)
message = prepare_dataset( voice, use_segments=slice_audio, text_length=validation_text_length, audio_length=validation_audio_length ) message = prepare_dataset( voice, use_segments=slice_audio, text_length=validation_text_length, audio_length=validation_audio_length, progress=progress )
messages.append(message) messages.append(message)
return "\n".join(messages) return "\n".join(messages)