From 999878d9c668730757bfecbc6d706fe3a44d2b42 Mon Sep 17 00:00:00 2001 From: mrq Date: Mon, 6 Mar 2023 11:01:33 +0000 Subject: [PATCH] and it turned out I wasn't even using the aligned segments, kmsing now that I have to *redo* my dataset again --- src/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/utils.py b/src/utils.py index 047c90a..dcf3bc0 100755 --- a/src/utils.py +++ b/src/utils.py @@ -1035,6 +1035,12 @@ def whisper_transcribe( file, language=None ): model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device) result_aligned = whisperx.align(result["segments"], model_a, metadata, file, device) + for i in range(len(result_aligned['segments'])): + del result_aligned['segments'][i]['word-segments'] + del result_aligned['segments'][i]['char-segments'] + + result['segments'] = result_aligned['segments'] + return result def prepare_dataset( files, outdir, language=None, skip_existings=False, progress=None ):