From fc5e6d8599a8e34a94c7cbf0e9d1facbf9c2141c Mon Sep 17 00:00:00 2001 From: mrq Date: Mon, 9 Dec 2024 14:38:09 -0600 Subject: [PATCH] fixes to process_emilia.py script --- scripts/process_emilia.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/process_emilia.py b/scripts/process_emilia.py index 75ca908..ae1eac8 100644 --- a/scripts/process_emilia.py +++ b/scripts/process_emilia.py @@ -110,8 +110,6 @@ def process( if only_speakers and speaker_group not in only_speakers: continue - os.makedirs(f'./{output_dataset}/{group_name}/{speaker_group}/', exist_ok=True) - if f'{group_name}/{speaker_group}' not in dataset: dataset.append(f'{group_name}/{speaker_group}') @@ -130,13 +128,14 @@ def process( extension = os.path.splitext(filename)[-1][1:] fname = filename.replace(f'.{extension}', "") - if "text" not in metadata: - continue waveform, sample_rate = None, None metadata = json.load(open(jsonpath, "r", encoding="utf-8")) + if "text" not in metadata: + continue speaker_id = metadata["speaker"] - outpath = Path(f'./{output_dataset}/{group_name}/{speaker_id}/{fname}.{extension}') + outpath = Path(f'./{output_dataset}/{group_name}/{speaker_group}/{speaker_id}/{fname}.{extension}') + os.makedirs(f'./{output_dataset}/{group_name}/{speaker_group}/{speaker_id}/', exist_ok=True) if _replace_file_extension(outpath, audio_extension).exists(): continue