1
0

and it turned out I wasn't even using the aligned segments, kmsing now that I have to *redo* my dataset again

This commit is contained in:
mrq 2023-03-06 11:01:33 +00:00
parent 14779a5020
commit 999878d9c6

View File

@ -1035,6 +1035,12 @@ def whisper_transcribe( file, language=None ):
model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device) model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
result_aligned = whisperx.align(result["segments"], model_a, metadata, file, device) result_aligned = whisperx.align(result["segments"], model_a, metadata, file, device)
for i in range(len(result_aligned['segments'])):
del result_aligned['segments'][i]['word-segments']
del result_aligned['segments'][i]['char-segments']
result['segments'] = result_aligned['segments']
return result return result
def prepare_dataset( files, outdir, language=None, skip_existings=False, progress=None ): def prepare_dataset( files, outdir, language=None, skip_existings=False, progress=None ):