diff --git a/vall_e/data.py b/vall_e/data.py
index 17b9ee0..65043d4 100755
--- a/vall_e/data.py
+++ b/vall_e/data.py
@@ -314,12 +314,14 @@ class Dataset(_Dataset):
 		# text-to-speech
 		if task == "tts" or task == "tts-c":
 			trim_length = int(cfg.dataset.prompt_duration * 75)
-			continuous = task == "tts-c" and trim_length * 2 < resps.shape[0]
+			# demote if the target is too short
+			if task == "tts-c" and trim_length * 2 >= resps.shape[0]:
+				task = "tts"
 
 			# VALL-E continuous
 			# ignore if target utterance is shorter than prompt duration
 			# to-do: actually do this for the AR only as I don't think the paper trained the NAR for this
-			if continuous:
+			if task == "tts-c":
 				proms = resps[:trim_length, :]
 				resps = resps[trim_length:, :]
 			else: