diff --git a/models/.template.yaml b/models/.template.dlas.yaml
similarity index 100%
rename from models/.template.yaml
rename to models/.template.dlas.yaml
diff --git a/models/.template.valle.yaml b/models/.template.valle.yaml
new file mode 100755
index 0000000..58e7acf
--- /dev/null
+++ b/models/.template.valle.yaml
@@ -0,0 +1,9 @@
+data_dirs: [./training/${voice}/valle/]
+spkr_name_getter: "lambda p: p.parts[-3]"
+
+model: ${model_name}
+batch_size: ${batch_size}
+eval_batch_size: ${validation_batch_size}
+eval_every: ${validation_rate}
+
+sampling_temperature: 1.0
\ No newline at end of file
diff --git a/src/utils.py b/src/utils.py
index 445fb2d..d20b6b5 100755
--- a/src/utils.py
+++ b/src/utils.py
@@ -20,6 +20,8 @@ import subprocess
 import psutil
 import yaml
 import hashlib
+import io
+import gzip
 
 import tqdm
 import torch
@@ -45,6 +47,7 @@ WHISPER_MODELS = ["tiny", "base", "small", "medium", "large"]
 WHISPER_SPECIALIZED_MODELS = ["tiny.en", "base.en", "small.en", "medium.en"]
 WHISPER_BACKENDS = ["openai/whisper", "lightmare/whispercpp"]
 VOCODERS = ['univnet', 'bigvgan_base_24khz_100band', 'bigvgan_24khz_100band']
+TTSES = ['tortoise'] # + ['vall-e']
 
 GENERATE_SETTINGS_ARGS = None
 
@@ -56,6 +59,16 @@ RESAMPLERS = {}
 MIN_TRAINING_DURATION = 0.6
 MAX_TRAINING_DURATION = 11.6097505669
 
+VALLE_ENABLED = False
+
+try:
+	from vall_e.emb.qnt import encode as quantize
+	from vall_e.emb.g2p import encode as phonemize
+
+	VALLE_ENABLED = True
+except Exception as e:
+	pass
+
 args = None
 tts = None
 tts_loading = False
@@ -1195,7 +1208,7 @@ def slice_dataset( voice, trim_silence=True, start_offset=0, end_offset=0, resul
 	messages.append(f"Sliced segments: {files} => {segments}.")
 	return "\n".join(messages)
 
-def prepare_dataset( voice, use_segments, text_length, audio_length, normalize=False ):
+def prepare_dataset( voice, use_segments, text_length, audio_length, normalize=True ):
 	indir = f'./training/{voice}/'
 	infile = f'{indir}/whisper.json'
 	messages = []
@@ -1273,6 +1286,8 @@ def prepare_dataset( voice, use_segments, text_length, audio_length, normalize=F
 				continue
 
 			waveform, sample_rate = torchaudio.load(path)
+			num_channels, num_frames = waveform.shape
+			duration = num_frames / sample_rate
 
 			error = validate_waveform( waveform, sample_rate )
 			if error:
@@ -1281,21 +1296,43 @@ def prepare_dataset( voice, use_segments, text_length, audio_length, normalize=F
 				messages.append(message)
 				errored += 1
 				continue
+			
 
 			culled = len(text) < text_length
 			if not culled and audio_length > 0:
-				num_channels, num_frames = waveform.shape
-				duration = num_frames / sample_rate
 				culled = duration < audio_length
 
 			# for when i add in a little treat ;), as it requires normalized text
-			if normalize and length(normalized_text) < 200:
+			if normalize and len(normalized_text) < 200:
 				line = f'audio/{file}|{text}|{normalized_text}'
 			else:
 				line = f'audio/{file}|{text}'
 
 			lines['training' if not culled else 'validation'].append(line) 
 
+			if culled or not VALLE_ENABLED:
+				continue
+			
+			# VALL-E dataset
+			os.makedirs(f'{indir}/valle/', exist_ok=True)
+
+			try:
+				from vall_e.emb.qnt import encode as quantize
+				from vall_e.emb.g2p import encode as phonemize
+				
+				if waveform.shape[0] == 2:
+					waveform = wav[:1]
+
+				quantized = quantize( waveform, sample_rate ).cpu()
+				torch.save(quantized, f'{indir}/valle/{file.replace(".wav",".qnt.pt")}')
+				
+				phonemes = phonemize(normalized_text)
+				open(f'{indir}/valle/{file.replace(".wav",".phn.txt")}', 'w', encoding='utf-8').write(" ".join(phonemes))
+
+			except Exception as e:
+				print(e)
+				pass
+
 	training_joined = "\n".join(lines['training'])
 	validation_joined = "\n".join(lines['validation'])
 
@@ -1538,21 +1575,27 @@ def save_training_settings( **kwargs ):
 		settings['source_model'] = f"pretrain_model_gpt: '{settings['source_model']}'"
 		settings['resume_state'] = f"# resume_state: '{settings['resume_state']}'"
 
-	with open(f'./models/.template.yaml', 'r', encoding="utf-8") as f:
-		yaml = f.read()
+	def use_template(template, out):
+		with open(template, 'r', encoding="utf-8") as f:
+			yaml = f.read()
 
-	# i could just load and edit the YAML directly, but this is easier, as I don't need to bother with path traversals
-	for k in settings:
-		if settings[k] is None:
-			continue
-		yaml = yaml.replace(f"${{{k}}}", str(settings[k]))
+		# i could just load and edit the YAML directly, but this is easier, as I don't need to bother with path traversals
+		for k in settings:
+			if settings[k] is None:
+				continue
+			yaml = yaml.replace(f"${{{k}}}", str(settings[k]))
 
-	outyaml = f'./training/{settings["voice"]}/train.yaml'
-	with open(outyaml, 'w', encoding="utf-8") as f:
-		f.write(yaml)
+		with open(out, 'w', encoding="utf-8") as f:
+			f.write(yaml)
 	
+	use_template(f'./models/.template.dlas.yaml', f'./training/{settings["voice"]}/train.yaml')
 
-	messages.append(f"Saved training output to: {outyaml}")
+	settings['model_name'] = "ar"
+	use_template(f'./models/.template.valle.yaml', f'./training/{settings["voice"]}/ar.yaml')
+	settings['model_name'] = "nar"
+	use_template(f'./models/.template.valle.yaml', f'./training/{settings["voice"]}/nar.yaml')
+
+	messages.append(f"Saved training output")
 	return settings, messages
 
 def import_voices(files, saveAs=None, progress=None):
@@ -1743,17 +1786,22 @@ def setup_args():
 		'latents-lean-and-mean': True,
 		'voice-fixer': False, # getting tired of long initialization times in a Colab for downloading a large dataset for it
 		'voice-fixer-use-cuda': True,
+		
 		'force-cpu-for-conditioning-latents': False,
 		'defer-tts-load': False,
 		'device-override': None,
 		'prune-nonfinal-outputs': True,
-		'vocoder-model': VOCODERS[-1],
 		'concurrency-count': 2,
-		'autocalculate-voice-chunk-duration-size': 0,
+		'autocalculate-voice-chunk-duration-size': 10,
+
 		'output-sample-rate': 44100,
 		'output-volume': 1,
 		
+		'tts-backend': TTSES[0],
+		
 		'autoregressive-model': None,
+		'vocoder-model': VOCODERS[-1],
+
 		'whisper-backend': 'openai/whisper',
 		'whisper-model': "base",
 
@@ -1792,6 +1840,7 @@ def setup_args():
 	parser.add_argument("--output-sample-rate", type=int, default=default_arguments['output-sample-rate'], help="Sample rate to resample the output to (from 24KHz)")
 	parser.add_argument("--output-volume", type=float, default=default_arguments['output-volume'], help="Adjusts volume of output")
 	
+	parser.add_argument("--tts-backend", default=default_arguments['tts-backend'], help="Specifies which TTS backend to use.")
 	parser.add_argument("--autoregressive-model", default=default_arguments['autoregressive-model'], help="Specifies which autoregressive model to use for sampling.")
 	parser.add_argument("--whisper-backend", default=default_arguments['whisper-backend'], action='store_true', help="Picks which whisper backend to use (openai/whisper, lightmare/whispercpp)")
 	parser.add_argument("--whisper-model", default=default_arguments['whisper-model'], help="Specifies which whisper model to use for transcription.")
@@ -1828,10 +1877,48 @@ def setup_args():
 	
 	return args
 
+def get_default_settings( hypenated=True ):
+	settings = {
+		'listen': None if not args.listen else args.listen,
+		'share': args.share,
+		'low-vram':args.low_vram,
+		'check-for-updates':args.check_for_updates,
+		'models-from-local-only':args.models_from_local_only,
+		'force-cpu-for-conditioning-latents': args.force_cpu_for_conditioning_latents,
+		'defer-tts-load': args.defer_tts_load,
+		'prune-nonfinal-outputs': args.prune_nonfinal_outputs,
+		'device-override': args.device_override,
+		'sample-batch-size': args.sample_batch_size,
+		'embed-output-metadata': args.embed_output_metadata,
+		'latents-lean-and-mean': args.latents_lean_and_mean,
+		'voice-fixer': args.voice_fixer,
+		'voice-fixer-use-cuda': args.voice_fixer_use_cuda,
+		'concurrency-count': args.concurrency_count,
+		'output-sample-rate': args.output_sample_rate,
+		'autocalculate-voice-chunk-duration-size': args.autocalculate_voice_chunk_duration_size,
+		'output-volume': args.output_volume,
+		
+		'tts-backend': args.tts_backend,
+
+		'autoregressive-model': args.autoregressive_model,
+		'vocoder-model': args.vocoder_model,
+
+		'whisper-backend': args.whisper_backend,
+		'whisper-model': args.whisper_model,
+
+		'training-default-halfp': args.training_default_halfp,
+		'training-default-bnb': args.training_default_bnb,
+	}
+
+	res = {}
+	for k in settings:
+		res[k.replace("-", "_") if not hypenated else k] = settings[k]
+	return res
+
 def update_args( **kwargs ):
 	global args
 
-	settings = {}
+	settings = get_default_settings(hypenated=False)
 	settings.update(kwargs)
 
 	args.listen = settings['listen']
@@ -1853,8 +1940,10 @@ def update_args( **kwargs ):
 	args.autocalculate_voice_chunk_duration_size = settings['autocalculate_voice_chunk_duration_size']
 	args.output_volume = settings['output_volume']
 	
+	args.tts_backend = settings['tts_backend']
 	args.autoregressive_model = settings['autoregressive_model']
 	args.vocoder_model = settings['vocoder_model']
+
 	args.whisper_backend = settings['whisper_backend']
 	args.whisper_model = settings['whisper_model']
 
@@ -1865,34 +1954,7 @@ def update_args( **kwargs ):
 
 def save_args_settings():
 	global args
-	settings = {
-		'listen': None if not args.listen else args.listen,
-		'share': args.share,
-		'low-vram':args.low_vram,
-		'check-for-updates':args.check_for_updates,
-		'models-from-local-only':args.models_from_local_only,
-		'force-cpu-for-conditioning-latents': args.force_cpu_for_conditioning_latents,
-		'defer-tts-load': args.defer_tts_load,
-		'prune-nonfinal-outputs': args.prune_nonfinal_outputs,
-		'device-override': args.device_override,
-		'sample-batch-size': args.sample_batch_size,
-		'embed-output-metadata': args.embed_output_metadata,
-		'latents-lean-and-mean': args.latents_lean_and_mean,
-		'voice-fixer': args.voice_fixer,
-		'voice-fixer-use-cuda': args.voice_fixer_use_cuda,
-		'concurrency-count': args.concurrency_count,
-		'output-sample-rate': args.output_sample_rate,
-		'autocalculate-voice-chunk-duration-size': args.autocalculate_voice_chunk_duration_size,
-		'output-volume': args.output_volume,
-		
-		'autoregressive-model': args.autoregressive_model,
-		'vocoder-model': args.vocoder_model,
-		'whisper-backend': args.whisper_backend,
-		'whisper-model': args.whisper_model,
-
-		'training-default-halfp': args.training_default_halfp,
-		'training-default-bnb': args.training_default_bnb,
-	}
+	settings = get_default_settings()
 
 	os.makedirs('./config/', exist_ok=True)
 	with open(f'./config/exec.json', 'w', encoding="utf-8") as f:
@@ -2009,18 +2071,13 @@ def load_tts( restart=False, autoregressive_model=None ):
 	if autoregressive_model == "auto":
 		autoregressive_model = deduce_autoregressive_model()
 
-	print(f"Loading TorToiSe... (AR: {autoregressive_model}, vocoder: {args.vocoder_model})")
 
 	if get_device_name() == "cpu":
 		print("!!!! WARNING !!!! No GPU available in PyTorch. You may need to reinstall PyTorch.")
 
 	tts_loading = True
-	try:
-		tts = TextToSpeech(minor_optimizations=not args.low_vram, autoregressive_model_path=autoregressive_model, vocoder_model=args.vocoder_model)
-	except Exception as e:
-		tts = TextToSpeech(minor_optimizations=not args.low_vram)
-		load_autoregressive_model(autoregressive_model)
-
+	print(f"Loading TorToiSe... (AR: {autoregressive_model}, vocoder: {args.vocoder_model})")
+	tts = TextToSpeech(minor_optimizations=not args.low_vram, autoregressive_model_path=autoregressive_model, vocoder_model=args.vocoder_model)		
 	tts_loading = False
 
 	get_model_path('dvae.pth')
diff --git a/src/webui.py b/src/webui.py
index a994cc2..3da2505 100755
--- a/src/webui.py
+++ b/src/webui.py
@@ -548,11 +548,11 @@ def setup_gradio():
 					EXEC_SETTINGS['autocalculate_voice_chunk_duration_size'] = gr.Number(label="Auto-Calculate Voice Chunk Duration (in seconds)", precision=0, value=args.autocalculate_voice_chunk_duration_size)
 					EXEC_SETTINGS['output_volume'] = gr.Slider(label="Output Volume", minimum=0, maximum=2, value=args.output_volume)
 					
+					# EXEC_SETTINGS['tts_backend'] = gr.Dropdown(TTSES, label="TTS Backend", value=args.tts_backend if args.tts_backend else TTSES[0])
+
 					EXEC_SETTINGS['autoregressive_model'] = gr.Dropdown(choices=autoregressive_models, label="Autoregressive Model", value=args.autoregressive_model if args.autoregressive_model else autoregressive_models[0])
-					
 					EXEC_SETTINGS['vocoder_model'] = gr.Dropdown(VOCODERS, label="Vocoder", value=args.vocoder_model if args.vocoder_model else VOCODERS[-1])
 					
-					
 					EXEC_SETTINGS['training_default_halfp'] = TRAINING_SETTINGS['half_p']
 					EXEC_SETTINGS['training_default_bnb'] = TRAINING_SETTINGS['bitsandbytes']
 
diff --git a/start.sh b/start.sh
index f1f9947..e0ac548 100755
--- a/start.sh
+++ b/start.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+ulimit -Sn `ulimit -Hn` # ROCm is a bitch
 source ./venv/bin/activate
 python3 ./src/main.py "$@"
 deactivate