From e2a6dc1c0a65ddc161c4020c44ee2b9dae57b87b Mon Sep 17 00:00:00 2001 From: mrq Date: Tue, 11 Jul 2023 14:53:32 +0000 Subject: [PATCH] under bark, properly use transcribed audio if the audio wasn't actually sliced (oops) --- src/utils.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/utils.py b/src/utils.py index 5eb536b..184c2f6 100755 --- a/src/utils.py +++ b/src/utils.py @@ -192,13 +192,36 @@ if BARK_ENABLED: candidates = [] for file in transcriptions: result = transcriptions[file] + added = 0 + for segment in result['segments']: + path = file.replace(".wav", f"_{pad(segment['id'], 4)}.wav") + # check if the slice actually exists + if not os.path.exists(f'./training/{voice}/audio/{path}'): + continue + entry = ( - file.replace(".wav", f"_{pad(segment['id'], 4)}.wav"), + path, segment['end'] - segment['start'], segment['text'] ) candidates.append(entry) + added = added + 1 + + # if nothing got added (assuming because nothign was sliced), use the master file + if added == 0: # added < len(result['segments']): + start = 0 + end = 0 + for segment in result['segments']: + start = max( start, segment['start'] ) + end = max( end, segment['end'] ) + + entry = ( + file, + end - start, + result['text'] + ) + candidates.append(entry) candidates.sort(key=lambda x: x[1]) candidate = random.choice(candidates)