Compare commits

...

3 Commits

Author SHA1 Message Date
lightmare
12340d769f Added extract_text_and_timestamps method 2023-02-18 23:03:51 +00:00
lightmare
765c02ec87 Fixed docstring 2023-02-18 23:01:15 +00:00
lightmare
2854cdfbe3 Fixed n_threads param 2023-02-18 23:00:47 +00:00

View File

@ -93,7 +93,7 @@ cdef whisper_full_params set_params(_Bool print_realtime, _Bool print_progress,
params.print_progress = print_progress
params.translate = translate
params.language = <const char *> language
n_threads = n_threads
params.n_threads = n_threads
return params
cdef class Whisper:
@ -152,7 +152,7 @@ cdef class Whisper:
Args:
res: A result id from transcribe(...)
Results:
Returns:
A list of transcribed strings.
Raises:
@ -166,4 +166,29 @@ cdef class Whisper:
whisper_full_get_segment_text(self.ctx, i).decode() for i in range(n_segments)
]
def extract_text_and_timestamps(self, int res):
"""Extracts the text and timestamps from a transcription.
Args:
res: A result id from transcribe(...)
Returns:
A list of tuples containing start time, end time and transcribed text.
e.g. [(0, 500, " This is a test.")]
Raises:
RuntimeError: The given result id was invalid.
"""
if res != 0:
raise RuntimeError
cdef int n_segments = whisper_full_n_segments(self.ctx)
results = []
for i in range(n_segments):
results.append((
whisper_full_get_segment_t0(self.ctx, i),
whisper_full_get_segment_t1(self.ctx, i),
whisper_full_get_segment_text(self.ctx, i).decode()
))
return results