now whisperx should output json that aligns with what's expected

This commit is contained in:
mrq 2023-03-22 20:01:30 +00:00
parent 8877960062
commit 13605f980c

View File

@ -1150,6 +1150,7 @@ def whisper_transcribe( file, language=None ):
segments = whisper_model.extract_text_and_timestamps( res )
result = {
'text': []
'segments': []
}
for segment in segments:
@ -1157,8 +1158,12 @@ def whisper_transcribe( file, language=None ):
'start': segment[0] / 100.0,
'end': segment[1] / 100.0,
'text': segment[2],
'id': len(result['segments'])
}
result['text'].append( segment[2] )
result['segments'].append(reparsed)
result['text'] = " ".join(result['text'])
return result
if args.whisper_backend == "m-bain/whisperx":
@ -1194,6 +1199,7 @@ def whisper_transcribe( file, language=None ):
result['segments'] = result_aligned['segments']
result['text'] = []
for segment in result['segments']:
segment['id'] = len(result['text'])
result['text'].append(segment['text'].strip())
result['text'] = " ".join(result['text'])