From e2a6dc1c0a65ddc161c4020c44ee2b9dae57b87b Mon Sep 17 00:00:00 2001
From: mrq <mrq@ecker.tech>
Date: Tue, 11 Jul 2023 14:53:32 +0000
Subject: [PATCH] under bark, properly use transcribed audio if the audio
 wasn't actually sliced (oops)

---
 src/utils.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/utils.py b/src/utils.py
index 5eb536b..184c2f6 100755
--- a/src/utils.py
+++ b/src/utils.py
@@ -192,13 +192,36 @@ if BARK_ENABLED:
 			candidates = []
 			for file in transcriptions:
 				result = transcriptions[file]
+				added = 0
+
 				for segment in result['segments']:
+					path = file.replace(".wav", f"_{pad(segment['id'], 4)}.wav")
+					# check if the slice actually exists
+					if not os.path.exists(f'./training/{voice}/audio/{path}'):
+						continue
+
 					entry = (
-						file.replace(".wav", f"_{pad(segment['id'], 4)}.wav"),
+						path,
 						segment['end'] - segment['start'],
 						segment['text']
 					)
 					candidates.append(entry)
+					added = added + 1
+
+				# if nothing got added (assuming because nothign was sliced), use the master file
+				if added == 0: # added < len(result['segments']):
+					start = 0
+					end = 0
+					for segment in result['segments']:
+						start = max( start, segment['start'] )
+						end = max( end, segment['end'] )
+
+					entry = (
+						file,
+						end - start,
+						result['text']
+					)
+					candidates.append(entry)
 
 			candidates.sort(key=lambda x: x[1])
 			candidate = random.choice(candidates)