'voice-fixer':False,# getting tired of long initialization times in a Colab for downloading a large dataset for it
'voice-fixer-use-cuda':True,
'force-cpu-for-conditioning-latents':False,
'defer-tts-load':False,
'device-override':None,
'whisper-model':"base",
'concurrency-count':2,
@ -82,6 +83,7 @@ def setup_args():
parser.add_argument("--voice-fixer",action='store_true',default=default_arguments['voice-fixer'],help="Uses python module 'voicefixer' to improve audio quality, if available.")
parser.add_argument("--voice-fixer-use-cuda",action='store_true',default=default_arguments['voice-fixer-use-cuda'],help="Hints to voicefixer to use CUDA, if available.")
parser.add_argument("--force-cpu-for-conditioning-latents",default=default_arguments['force-cpu-for-conditioning-latents'],action='store_true',help="Forces computing conditional latents to be done on the CPU (if you constantyl OOM on low chunk counts)")
parser.add_argument("--device-override",default=default_arguments['device-override'],help="A device string to override pass through Torch")
parser.add_argument("--whisper-model",default=default_arguments['whisper-model'],help="Specifies which whisper model to use for transcription.")
parser.add_argument("--sample-batch-size",default=default_arguments['sample-batch-size'],type=int,help="Sets how many batches to use during the autoregressive samples pass")