2024-12-18 04:47:12 +00:00
|
|
|
"""
|
|
|
|
Handles processing seed-tts-eval's dataset into something to be used for vall_e.demo
|
|
|
|
|
|
|
|
Reads from meta.lst, a text file where each utterance is formatted as:
|
|
|
|
|
|
|
|
<reference path>|<reference text>|<prompt path>|<prompt text>
|
|
|
|
"""
|
|
|
|
|
|
|
|
import os
|
|
|
|
import json
|
|
|
|
import argparse
|
|
|
|
import torch
|
|
|
|
import shutil
|
|
|
|
import torchaudio
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
from tqdm.auto import tqdm
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
def process(
|
|
|
|
input_dir=Path("./seedtts_testset/en/"),
|
|
|
|
list_name="./meta.lst",
|
|
|
|
wav_dir="./wavs/",
|
|
|
|
output_dir=Path("./dataset/seed-tts-eval-en/"),
|
|
|
|
):
|
|
|
|
language = "auto"
|
|
|
|
|
|
|
|
if "en" in str(input_dir):
|
|
|
|
language = "en"
|
|
|
|
elif "zh" in str(input_dir):
|
|
|
|
language = "zh"
|
|
|
|
|
|
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
# read manifest
|
|
|
|
lines = open(input_dir / list_name).read()
|
|
|
|
lines = lines.split("\n")
|
|
|
|
# split it even further
|
|
|
|
for line in lines:
|
|
|
|
if not line:
|
|
|
|
continue
|
2024-12-18 05:17:12 +00:00
|
|
|
filename, prompt_text, prompt_wav, text = line.split("|")
|
2024-12-18 04:47:12 +00:00
|
|
|
|
2024-12-18 05:17:12 +00:00
|
|
|
(output_dir / filename / "out").mkdir(parents=True, exist_ok=True)
|
2024-12-18 04:47:12 +00:00
|
|
|
|
2024-12-18 05:17:12 +00:00
|
|
|
open( output_dir / filename / "prompt.txt", "w", encoding="utf-8" ).write( text )
|
|
|
|
open( output_dir / filename / "language.txt", "w", encoding="utf-8" ).write( language )
|
2024-12-18 04:47:12 +00:00
|
|
|
|
2024-12-18 05:17:12 +00:00
|
|
|
shutil.copy((input_dir / wav_dir / filename).with_suffix(".wav"), output_dir / filename / "reference.wav" )
|
|
|
|
shutil.copy(input_dir / prompt_wav, output_dir / filename / "prompt.wav" )
|
2024-12-18 04:47:12 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
process()
|