fixes for whisperx batching

This commit is contained in:
mrq 2023-03-22 19:53:42 +00:00
parent 4056a27bcb
commit 8877960062

View File

@ -1168,7 +1168,7 @@ def whisper_transcribe( file, language=None ):
device = "cuda" if get_device_name() == "cuda" else "cpu" device = "cuda" if get_device_name() == "cuda" else "cpu"
if whisper_vad: if whisper_vad:
if args.whisper_batchsize > 1: if args.whisper_batchsize > 1:
result = whisperx.transcribe_with_vad_parallel(whisper_model, file, whisper_vad, batch_size=args.whisper_batchsize) result = whisperx.transcribe_with_vad_parallel(whisper_model, file, whisper_vad, batch_size=args.whisper_batchsize, language=language, task="transcribe")
else: else:
result = whisperx.transcribe_with_vad(whisper_model, file, whisper_vad) result = whisperx.transcribe_with_vad(whisper_model, file, whisper_vad)
else: else:
@ -1192,6 +1192,10 @@ def whisper_transcribe( file, language=None ):
del result_aligned['segments'][i]['char-segments'] del result_aligned['segments'][i]['char-segments']
result['segments'] = result_aligned['segments'] result['segments'] = result_aligned['segments']
result['text'] = []
for segment in result['segments']:
result['text'].append(segment['text'].strip())
result['text'] = " ".join(result['text'])
return result return result