Added extract_text_and_timestamps
method
This commit is contained in:
parent
765c02ec87
commit
12340d769f
|
@ -166,4 +166,29 @@ cdef class Whisper:
|
|||
whisper_full_get_segment_text(self.ctx, i).decode() for i in range(n_segments)
|
||||
]
|
||||
|
||||
def extract_text_and_timestamps(self, int res):
|
||||
"""Extracts the text and timestamps from a transcription.
|
||||
|
||||
Args:
|
||||
res: A result id from transcribe(...)
|
||||
|
||||
Returns:
|
||||
A list of tuples containing start time, end time and transcribed text.
|
||||
e.g. [(0, 500, " This is a test.")]
|
||||
|
||||
Raises:
|
||||
RuntimeError: The given result id was invalid.
|
||||
"""
|
||||
if res != 0:
|
||||
raise RuntimeError
|
||||
cdef int n_segments = whisper_full_n_segments(self.ctx)
|
||||
results = []
|
||||
for i in range(n_segments):
|
||||
results.append((
|
||||
whisper_full_get_segment_t0(self.ctx, i),
|
||||
whisper_full_get_segment_t1(self.ctx, i),
|
||||
whisper_full_get_segment_text(self.ctx, i).decode()
|
||||
))
|
||||
return results
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user