Added extract_text_and_timestamps method

This commit is contained in:
lightmare 2023-02-18 23:03:51 +00:00
parent 765c02ec87
commit 12340d769f

View File

@ -166,4 +166,29 @@ cdef class Whisper:
whisper_full_get_segment_text(self.ctx, i).decode() for i in range(n_segments)
]
def extract_text_and_timestamps(self, int res):
"""Extracts the text and timestamps from a transcription.
Args:
res: A result id from transcribe(...)
Returns:
A list of tuples containing start time, end time and transcribed text.
e.g. [(0, 500, " This is a test.")]
Raises:
RuntimeError: The given result id was invalid.
"""
if res != 0:
raise RuntimeError
cdef int n_segments = whisper_full_n_segments(self.ctx)
results = []
for i in range(n_segments):
results.append((
whisper_full_get_segment_t0(self.ctx, i),
whisper_full_get_segment_t1(self.ctx, i),
whisper_full_get_segment_text(self.ctx, i).decode()
))
return results