forked from mrq/tortoise-tts
28 lines
1.0 KiB
Python
28 lines
1.0 KiB
Python
|
import argparse
|
||
|
import os
|
||
|
|
||
|
import torchaudio
|
||
|
|
||
|
from api import TextToSpeech
|
||
|
from tortoise.utils.audio import load_audio
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
parser = argparse.ArgumentParser()
|
||
|
parser.add_argument('--eval_path', type=str, help='Path to TSV test file', default="D:\\tmp\\tortoise-tts-eval\\test.tsv")
|
||
|
parser.add_argument('--output_path', type=str, help='Where to put results', default="D:\\tmp\\tortoise-tts-eval\\baseline")
|
||
|
parser.add_argument('--preset', type=str, help='Rendering preset.', default="standard")
|
||
|
args = parser.parse_args()
|
||
|
os.makedirs(args.output_path, exist_ok=True)
|
||
|
|
||
|
tts = TextToSpeech()
|
||
|
|
||
|
with open(args.eval_path, 'r', encoding='utf-8') as f:
|
||
|
lines = f.readlines()
|
||
|
|
||
|
for line in lines:
|
||
|
text, real = line.strip().split('\t')
|
||
|
conds = [load_audio(real, 22050)]
|
||
|
gen = tts.tts_with_preset(text, voice_samples=conds, conditioning_latents=None, preset=args.preset)
|
||
|
torchaudio.save(os.path.join(args.output_path, os.path.basename(real)), gen.squeeze(0).cpu(), 24000)
|
||
|
|