update
This commit is contained in:
parent
02429c527e
commit
c7a8d72e70
|
@ -1,9 +1,8 @@
|
||||||
#!python
|
#!python
|
||||||
# cython: language_level=3
|
# cython: language_level=3
|
||||||
|
|
||||||
from libc.stdint cimport int64_t
|
from libc.stdint cimport int64_t
|
||||||
|
|
||||||
cdef:
|
cdef nogil:
|
||||||
int WHISPER_SAMPLE_RATE = 16000
|
int WHISPER_SAMPLE_RATE = 16000
|
||||||
int WHISPER_N_FFT = 400
|
int WHISPER_N_FFT = 400
|
||||||
int WHISPER_N_MEL = 80
|
int WHISPER_N_MEL = 80
|
||||||
|
@ -13,6 +12,9 @@ cdef:
|
||||||
char* TEST_FILE = b'test.wav'
|
char* TEST_FILE = b'test.wav'
|
||||||
char* DEFAULT_MODEL = b'ggml-tiny.bin'
|
char* DEFAULT_MODEL = b'ggml-tiny.bin'
|
||||||
char* LANGUAGE = b'fr'
|
char* LANGUAGE = b'fr'
|
||||||
|
ctypedef struct audio_data:
|
||||||
|
float* frames;
|
||||||
|
int n_frames;
|
||||||
|
|
||||||
cdef extern from "whisper.h" nogil:
|
cdef extern from "whisper.h" nogil:
|
||||||
enum whisper_sampling_strategy:
|
enum whisper_sampling_strategy:
|
||||||
|
@ -109,8 +111,3 @@ cdef extern from "whisper.h" nogil:
|
||||||
const char* whisper_print_system_info()
|
const char* whisper_print_system_info()
|
||||||
const char* whisper_full_get_segment_text(whisper_context*, int)
|
const char* whisper_full_get_segment_text(whisper_context*, int)
|
||||||
|
|
||||||
|
|
||||||
ctypedef struct audio_data:
|
|
||||||
float* frames;
|
|
||||||
int n_frames;
|
|
||||||
|
|
||||||
|
|
|
@ -8,12 +8,14 @@ import numpy as np
|
||||||
import requests
|
import requests
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
cimport numpy as cnp
|
cimport numpy as cnp
|
||||||
|
|
||||||
cdef int SAMPLE_RATE = 16000
|
cdef int SAMPLE_RATE = 16000
|
||||||
cdef char* TEST_FILE = b'test.wav'
|
cdef char* TEST_FILE = 'test.wav'
|
||||||
cdef char* DEFAULT_MODEL = 'tiny'
|
cdef char* DEFAULT_MODEL = 'tiny'
|
||||||
cdef char* LANGUAGE = b'fr'
|
cdef char* LANGUAGE = b'fr'
|
||||||
|
cdef int N_THREADS = os.cpu_count()
|
||||||
|
|
||||||
MODELS = {
|
MODELS = {
|
||||||
'model_ggml_tiny.bin': 'https://ggml.ggerganov.com/ggml-model-whisper-tiny.bin',
|
'model_ggml_tiny.bin': 'https://ggml.ggerganov.com/ggml-model-whisper-tiny.bin',
|
||||||
|
@ -67,7 +69,7 @@ cdef audio_data load_audio(bytes file, int sr = SAMPLE_RATE):
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
cdef whisper_full_params default_params():
|
cdef whisper_full_params default_params() nogil:
|
||||||
cdef whisper_full_params params = whisper_full_default_params(
|
cdef whisper_full_params params = whisper_full_default_params(
|
||||||
whisper_sampling_strategy.WHISPER_SAMPLING_GREEDY
|
whisper_sampling_strategy.WHISPER_SAMPLING_GREEDY
|
||||||
)
|
)
|
||||||
|
@ -75,6 +77,7 @@ cdef whisper_full_params default_params():
|
||||||
params.print_progress = True
|
params.print_progress = True
|
||||||
params.translate = False
|
params.translate = False
|
||||||
params.language = <const char *> LANGUAGE
|
params.language = <const char *> LANGUAGE
|
||||||
|
n_threads = N_THREADS
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -83,25 +86,26 @@ cdef class Whisper:
|
||||||
cdef whisper_full_params params
|
cdef whisper_full_params params
|
||||||
|
|
||||||
def __init__(self, model=DEFAULT_MODEL, pb=None):
|
def __init__(self, model=DEFAULT_MODEL, pb=None):
|
||||||
model_fullname = f'model_ggml_{model.decode()}.bin'.encode('utf8')
|
model_fullname = f'model_ggml_{model}.bin'.encode('utf8')
|
||||||
download_model(model_fullname)
|
download_model(model_fullname)
|
||||||
cdef bytes model_b = model_fullname
|
cdef bytes model_b = model_fullname
|
||||||
self.ctx = whisper_init(model_b)
|
self.ctx = whisper_init(model_b)
|
||||||
self.params = default_params()
|
self.params = default_params()
|
||||||
|
whisper_print_system_info()
|
||||||
|
|
||||||
def __dealloc__(self):
|
def __dealloc__(self):
|
||||||
whisper_free(self.ctx)
|
whisper_free(self.ctx)
|
||||||
|
|
||||||
def transcribe(self):
|
def transcribe(self, filename=TEST_FILE):
|
||||||
cdef audio_data data = load_audio(TEST_FILE)
|
cdef audio_data data = load_audio(<bytes>filename)
|
||||||
return whisper_full(self.ctx, self.params, data.frames, data.n_frames)
|
return whisper_full(self.ctx, self.params, data.frames, data.n_frames)
|
||||||
|
|
||||||
cpdef str extract_text(self, int res):
|
cpdef list extract_text(self, int res):
|
||||||
if res != 0:
|
if res != 0:
|
||||||
raise RuntimeError
|
raise RuntimeError
|
||||||
cdef int n_segments = whisper_full_n_segments(self.ctx)
|
cdef int n_segments = whisper_full_n_segments(self.ctx)
|
||||||
return b'\n'.join([
|
return [
|
||||||
whisper_full_get_segment_text(self.ctx, i) for i in range(n_segments)
|
whisper_full_get_segment_text(self.ctx, i).decode() for i in range(n_segments)
|
||||||
]).decode()
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user