From d838c5ee733e32218af126feae899d8ac9703998 Mon Sep 17 00:00:00 2001 From: boolemancer Date: Fri, 27 Jan 2023 02:28:34 -0800 Subject: [PATCH] Fix a dangling pointer to the input audio data --- whispercpp.pyx | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/whispercpp.pyx b/whispercpp.pyx index d1fc5d7..27855c1 100644 --- a/whispercpp.pyx +++ b/whispercpp.pyx @@ -30,20 +30,20 @@ MODELS = { } def model_exists(model): - return os.path.exists(MODELS_DIR + "/" + model.decode()) + return os.path.exists(Path(MODELS_DIR).joinpath(model)) def download_model(model): if model_exists(model): return print(f'Downloading {model}...') - url = MODELS[model.decode()] + url = MODELS[model] r = requests.get(url, allow_redirects=True) - with open(MODELS_DIR + "/" + model.decode(), 'wb') as f: + with open(Path(MODELS_DIR).joinpath(model), 'wb') as f: f.write(r.content) -cdef audio_data load_audio(bytes file, int sr = SAMPLE_RATE): +cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] load_audio(bytes file, int sr = SAMPLE_RATE): try: out = ( ffmpeg.input(file, threads=0) @@ -67,11 +67,7 @@ cdef audio_data load_audio(bytes file, int sr = SAMPLE_RATE): .astype(np.float32) ) / pow(2, 15) - cdef audio_data data; - data.frames = &frames[0] - data.n_frames = len(frames) - - return data + return frames cdef whisper_full_params default_params() nogil: cdef whisper_full_params params = whisper_full_default_params( @@ -90,9 +86,10 @@ cdef class Whisper: cdef whisper_full_params params def __init__(self, model=DEFAULT_MODEL, pb=None): - model_fullname = f'model_ggml_{model}.bin'.encode('utf8') + model_fullname = f'ggml-{model}.bin' download_model(model_fullname) - cdef bytes model_b = MODELS_DIR.encode('utf8') + b'/' + model_fullname + model_path = Path(MODELS_DIR).joinpath(model_fullname) + cdef bytes model_b = str(model_path).encode('utf8') self.ctx = whisper_init(model_b) self.params = default_params() whisper_print_system_info() @@ -102,9 +99,10 @@ cdef class Whisper: def transcribe(self, filename=TEST_FILE): print("Loading data..") - cdef audio_data data = load_audio(filename) + cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = load_audio(filename) + print("Transcribing..") - return whisper_full(self.ctx, self.params, data.frames, data.n_frames) + return whisper_full(self.ctx, self.params, &frames[0], len(frames)) def extract_text(self, int res): print("Extracting text...")