2022-01-28 06:19:29 +00:00
import argparse
import os
import torchaudio
2022-02-04 05:18:21 +00:00
2022-04-18 16:22:36 +00:00
from api import TextToSpeech
from utils . audio import load_audio , get_voices
2022-02-04 05:18:21 +00:00
2022-01-28 06:19:29 +00:00
if __name__ == ' __main__ ' :
parser = argparse . ArgumentParser ( )
2022-04-15 14:26:11 +00:00
parser . add_argument ( ' --text ' , type = str , help = ' Text to speak. ' , default = " I am a language model that has learned to speak. " )
2022-04-18 16:22:36 +00:00
parser . add_argument ( ' --voice ' , type = str , help = ' Selects the voice to use for generation. See options in voices/ directory (and add your own!) '
2022-04-26 02:07:07 +00:00
' Use the & character to join two voices together. Use a comma to perform inference on multiple voices. ' , default = ' pat ' )
2022-04-20 23:24:09 +00:00
parser . add_argument ( ' --preset ' , type = str , help = ' Which voice preset to use. ' , default = ' standard ' )
parser . add_argument ( ' --voice_diversity_intelligibility_slider ' , type = float ,
help = ' How to balance vocal diversity with the quality/intelligibility of the spoken text. 0 means highly diverse voice (not recommended), 1 means maximize intellibility ' ,
default = .5 )
2022-04-15 14:26:11 +00:00
parser . add_argument ( ' --output_path ' , type = str , help = ' Where to store outputs. ' , default = ' results/ ' )
2022-01-28 06:19:29 +00:00
args = parser . parse_args ( )
os . makedirs ( args . output_path , exist_ok = True )
2022-04-01 17:55:07 +00:00
2022-04-18 16:30:22 +00:00
tts = TextToSpeech ( )
2022-01-28 06:19:29 +00:00
2022-04-18 16:22:36 +00:00
voices = get_voices ( )
selected_voices = args . voice . split ( ' , ' )
for voice in selected_voices :
cond_paths = voices [ voice ]
2022-02-04 05:18:21 +00:00
conds = [ ]
for cond_path in cond_paths :
2022-04-01 17:55:07 +00:00
c = load_audio ( cond_path , 22050 )
2022-02-04 05:18:21 +00:00
conds . append ( c )
2022-04-20 23:24:09 +00:00
gen = tts . tts_with_preset ( args . text , conds , preset = args . preset , clvp_cvvp_slider = args . voice_diversity_intelligibility_slider )
2022-04-01 17:55:07 +00:00
torchaudio . save ( os . path . join ( args . output_path , f ' { voice } .wav ' ) , gen . squeeze ( 0 ) . cpu ( ) , 24000 )
2022-03-27 03:32:12 +00:00