From 13605f980c661a0b7b448c965d521f6e70585b15 Mon Sep 17 00:00:00 2001 From: mrq Date: Wed, 22 Mar 2023 20:01:30 +0000 Subject: [PATCH] now whisperx should output json that aligns with what's expected --- src/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/utils.py b/src/utils.py index 5b537ad..5ccb93d 100755 --- a/src/utils.py +++ b/src/utils.py @@ -1150,6 +1150,7 @@ def whisper_transcribe( file, language=None ): segments = whisper_model.extract_text_and_timestamps( res ) result = { + 'text': [] 'segments': [] } for segment in segments: @@ -1157,8 +1158,12 @@ def whisper_transcribe( file, language=None ): 'start': segment[0] / 100.0, 'end': segment[1] / 100.0, 'text': segment[2], + 'id': len(result['segments']) } + result['text'].append( segment[2] ) result['segments'].append(reparsed) + + result['text'] = " ".join(result['text']) return result if args.whisper_backend == "m-bain/whisperx": @@ -1194,6 +1199,7 @@ def whisper_transcribe( file, language=None ): result['segments'] = result_aligned['segments'] result['text'] = [] for segment in result['segments']: + segment['id'] = len(result['text']) result['text'].append(segment['text'].strip()) result['text'] = " ".join(result['text'])