fixes to process_emilia.py script
This commit is contained in:
parent
a6c745bafb
commit
fc5e6d8599
|
@ -110,8 +110,6 @@ def process(
|
||||||
if only_speakers and speaker_group not in only_speakers:
|
if only_speakers and speaker_group not in only_speakers:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
os.makedirs(f'./{output_dataset}/{group_name}/{speaker_group}/', exist_ok=True)
|
|
||||||
|
|
||||||
if f'{group_name}/{speaker_group}' not in dataset:
|
if f'{group_name}/{speaker_group}' not in dataset:
|
||||||
dataset.append(f'{group_name}/{speaker_group}')
|
dataset.append(f'{group_name}/{speaker_group}')
|
||||||
|
|
||||||
|
@ -130,13 +128,14 @@ def process(
|
||||||
|
|
||||||
extension = os.path.splitext(filename)[-1][1:]
|
extension = os.path.splitext(filename)[-1][1:]
|
||||||
fname = filename.replace(f'.{extension}', "")
|
fname = filename.replace(f'.{extension}', "")
|
||||||
if "text" not in metadata:
|
|
||||||
continue
|
|
||||||
|
|
||||||
waveform, sample_rate = None, None
|
waveform, sample_rate = None, None
|
||||||
metadata = json.load(open(jsonpath, "r", encoding="utf-8"))
|
metadata = json.load(open(jsonpath, "r", encoding="utf-8"))
|
||||||
|
if "text" not in metadata:
|
||||||
|
continue
|
||||||
speaker_id = metadata["speaker"]
|
speaker_id = metadata["speaker"]
|
||||||
outpath = Path(f'./{output_dataset}/{group_name}/{speaker_id}/{fname}.{extension}')
|
outpath = Path(f'./{output_dataset}/{group_name}/{speaker_group}/{speaker_id}/{fname}.{extension}')
|
||||||
|
os.makedirs(f'./{output_dataset}/{group_name}/{speaker_group}/{speaker_id}/', exist_ok=True)
|
||||||
|
|
||||||
if _replace_file_extension(outpath, audio_extension).exists():
|
if _replace_file_extension(outpath, audio_extension).exists():
|
||||||
continue
|
continue
|
||||||
|
|
Loading…
Reference in New Issue
Block a user