Changes made (not exhaustive):
- changed defaults
- whisper.cpp submodule set to 1.2.0
- removed `requests` dependency
- models dir can be changed in constructor
- added support for setting params
- added back support for `large-v1` model
- added support for english-only models
This commit is contained in:
lightmare 2023-02-18 22:59:42 +00:00
parent e2581c8aad
commit af035ea355
8 changed files with 268 additions and 229 deletions

View File

@ -1,40 +0,0 @@
name: build_wheels
run-name: ${{ github.actor }} is building wheels
on: [push]
jobs:
build_wheels:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
steps:
- uses: actions/checkout@v2
- name: Checkout submodules
run: |
git submodule update --init --recursive
- uses: actions/setup-python@v2
with:
python-version: '3.10'
- name: Setup pip
run: |
python -m pip install --upgrade pip
python -m pip install cibuildwheel==1.6.4
- name: Install
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get update
sudo apt-get install gcc g++
fi
shell: bash
- name: Build wheel
run: python -m cibuildwheel --output-dir dist/
env:
CIBW_BUILD: cp36-* cp37-* cp38-*
- uses: actions/upload-artifact@v2
with:
path: ./dist/*.whl

2
.gitignore vendored
View File

@ -127,3 +127,5 @@ dmypy.json
# Pyre type checker # Pyre type checker
.pyre/ .pyre/
whispercpp.cpp

1
.gitmodules vendored
View File

@ -1,3 +1,4 @@
[submodule "whisper.cpp"] [submodule "whisper.cpp"]
path = whisper.cpp path = whisper.cpp
url = https://github.com/ggerganov/whisper.cpp url = https://github.com/ggerganov/whisper.cpp
branch = b2083c5d02db9a1e6dbb3d58254fd65ebfff4b5d

View File

@ -1,12 +1,23 @@
Python bindings for whisper.cpp Python bindings for whisper.cpp
=============================== ===============================
`pip install git+https://github.com/o4dev/whispercpp.py` ```
git clone --recurse-submodules https://git.ecker.tech/lightmare/whispercpp.py
cd whispercpp.py
pip install .
```
or
```
git clone https://git.ecker.tech/lightmare/whispercpp.py
cd whispercpp.py
git submodule update --init
pip install .
```
```python ```python
from whispercpp import Whisper from whispercpp import Whisper
w = Whisper('tiny') w = Whisper('tiny', models_dir='./models/', language=b'en')
result = w.transcribe("myfile.mp3") result = w.transcribe("myfile.mp3")
text = w.extract_text(result) text = w.extract_text(result)
@ -14,3 +25,12 @@ text = w.extract_text(result)
Note: default parameters might need to be tweaked. Note: default parameters might need to be tweaked.
See Whispercpp.pyx. See Whispercpp.pyx.
Changes made (not exhaustive):
- changed defaults
- whisper.cpp submodule set to 1.2.0
- removed `requests` dependency
- models dir can be changed in constructor
- added support for setting params
- added back support for `large-v1` model
- added support for english-only models

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
Cython
numpy
ffmpeg-python

View File

@ -34,7 +34,6 @@ setup(
include_dirs = ['./whisper.cpp/', numpy.get_include()], include_dirs = ['./whisper.cpp/', numpy.get_include()],
install_requires=[ install_requires=[
'numpy', 'numpy',
'ffmpeg-python', 'ffmpeg-python'
'requests'
], ],
) )

View File

@ -10,8 +10,8 @@ cdef nogil:
int WHISPER_CHUNK_SIZE = 30 int WHISPER_CHUNK_SIZE = 30
int SAMPLE_RATE = 16000 int SAMPLE_RATE = 16000
char* TEST_FILE = b'test.wav' char* TEST_FILE = b'test.wav'
char* DEFAULT_MODEL = b'ggml-tiny.bin' char* DEFAULT_MODEL = b'ggml-base.bin'
char* LANGUAGE = b'fr' char* LANGUAGE = b'en'
ctypedef struct audio_data: ctypedef struct audio_data:
float* frames; float* frames;
int n_frames; int n_frames;

View File

@ -3,43 +3,60 @@
import ffmpeg import ffmpeg
import numpy as np import numpy as np
import requests import urllib.request
import os import os
from pathlib import Path from pathlib import Path
MODELS_DIR = str(Path('~/.ggml-models').expanduser()) MODELS_DIR = str(Path('~/.ggml-models').expanduser())
print("Saving models to:", MODELS_DIR)
cimport numpy as cnp cimport numpy as cnp
cdef int SAMPLE_RATE = 16000 cdef int SAMPLE_RATE = 16000
cdef char* TEST_FILE = 'test.wav' cdef char* TEST_FILE = 'test.wav'
cdef char* DEFAULT_MODEL = 'tiny' cdef char* DEFAULT_MODEL = 'base'
cdef char* LANGUAGE = b'fr' cdef char* LANGUAGE = b'en'
cdef int N_THREADS = os.cpu_count() cdef int N_THREADS = os.cpu_count()
cdef _Bool PRINT_REALTIME = False
cdef _Bool PRINT_PROGRESS = False
cdef _Bool TRANSLATE = False
MODELS = { MODELS = {
'ggml-tiny.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin', 'ggml-tiny.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin',
'ggml-tiny.en.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin',
'ggml-base.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-base.bin', 'ggml-base.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-base.bin',
'ggml-base.en.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin',
'ggml-small.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-small.bin', 'ggml-small.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-small.bin',
'ggml-small.en.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin',
'ggml-medium.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin', 'ggml-medium.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin',
'ggml-medium.en.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-medium.en.bin',
'ggml-large-v1.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-large-v1.bin',
'ggml-large.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-large.bin', 'ggml-large.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-large.bin',
} }
def model_exists(model): def model_exists(model, models_dir=MODELS_DIR):
return os.path.exists(Path(MODELS_DIR).joinpath(model)) return os.path.exists(Path(models_dir).joinpath(model))
def download_model(model): def download_model(model, models_dir=MODELS_DIR):
if model_exists(model): """Downloads ggml model with the given identifier
The filenames mirror the ones given in ggerganov's repos.
e.g. 'small' becomes 'ggml-small.bin'
Args:
model: The model identifier
models_dir: The path where the file is written to
"""
if model_exists(model, models_dir=models_dir):
return return
print(f'Downloading {model}...') print(f'Downloading {model} to {models_dir}...')
url = MODELS[model] url = MODELS[model]
r = requests.get(url, allow_redirects=True) os.makedirs(models_dir, exist_ok=True)
os.makedirs(MODELS_DIR, exist_ok=True) with urllib.request.urlopen(url) as r:
with open(Path(MODELS_DIR).joinpath(model), 'wb') as f: with open(Path(models_dir).joinpath(model), 'wb') as f:
f.write(r.content) f.write(r.read())
cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] load_audio(bytes file, int sr = SAMPLE_RATE): cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] load_audio(bytes file, int sr = SAMPLE_RATE):
@ -57,7 +74,7 @@ cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] load_audio(bytes file, int sr
capture_stderr=True capture_stderr=True
) )
)[0] )[0]
except: except Exception:
raise RuntimeError(f"File '{file}' not found") raise RuntimeError(f"File '{file}' not found")
cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = ( cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = (
@ -68,43 +85,80 @@ cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] load_audio(bytes file, int sr
return frames return frames
cdef whisper_full_params default_params() nogil: cdef whisper_full_params set_params(_Bool print_realtime, _Bool print_progress, _Bool translate, char* language, int n_threads) nogil:
cdef whisper_full_params params = whisper_full_default_params( cdef whisper_full_params params = whisper_full_default_params(
whisper_sampling_strategy.WHISPER_SAMPLING_GREEDY whisper_sampling_strategy.WHISPER_SAMPLING_GREEDY
) )
params.print_realtime = True params.print_realtime = print_realtime
params.print_progress = True params.print_progress = print_progress
params.translate = False params.translate = translate
params.language = <const char *> LANGUAGE params.language = <const char *> language
n_threads = N_THREADS n_threads = n_threads
return params return params
cdef class Whisper: cdef class Whisper:
cdef whisper_context * ctx cdef whisper_context * ctx
cdef whisper_full_params params cdef whisper_full_params params
def __init__(self, model=DEFAULT_MODEL, pb=None): def __init__(self, model = DEFAULT_MODEL, models_dir = MODELS_DIR, _Bool print_realtime = PRINT_REALTIME, _Bool print_progress = PRINT_PROGRESS, _Bool translate = TRANSLATE, char* language = LANGUAGE, int n_threads = N_THREADS, _Bool print_system_info = False): # not pretty, look for a way to use kwargs?
model_fullname = f'ggml-{model}.bin'.encode('utf8') """Constructor for Whisper class.
download_model(model_fullname)
model_path = Path(MODELS_DIR).joinpath(model_fullname) Automatically checks for model and downloads it if necessary.
Args:
model: Model identifier, e.g. 'base' (see MODELS)
models_dir: The path where the models should be stored
print_realtime: whisper.cpp's real time transcription output
print_progress: whisper.cpp's progress indicator
translate: whisper.cpp's translation option
language: Which language to use. Must be a byte string.
n_threads: Amount of threads to use
print_system_info: whisper.cpp's system info output
"""
model_fullname = f'ggml-{model}.bin' #.encode('utf8')
download_model(model_fullname, models_dir=models_dir)
model_path = Path(models_dir).joinpath(model_fullname)
cdef bytes model_b = str(model_path).encode('utf8') cdef bytes model_b = str(model_path).encode('utf8')
self.ctx = whisper_init(model_b) self.ctx = whisper_init(model_b)
self.params = default_params() self.params = set_params(print_realtime, print_progress, translate, language, n_threads)
if print_system_info:
whisper_print_system_info() whisper_print_system_info()
def __dealloc__(self): def __dealloc__(self):
whisper_free(self.ctx) whisper_free(self.ctx)
def transcribe(self, filename = TEST_FILE): def transcribe(self, filename = TEST_FILE):
print("Loading data..") """Transcribes from given file.
Args:
filename: Path to file
Returns:
A result id for extract_text(...)
Raises:
RuntimeError: The given file could not be found
"""
#print(f"Loading data from '{filename}'...")
cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = load_audio(<bytes>filename) cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = load_audio(<bytes>filename)
print("Transcribing..") #print("Transcribing..")
return whisper_full(self.ctx, self.params, &frames[0], len(frames)) return whisper_full(self.ctx, self.params, &frames[0], len(frames))
def extract_text(self, int res): def extract_text(self, int res):
print("Extracting text...") """Extracts the text from a transcription.
Args:
res: A result id from transcribe(...)
Results:
A list of transcribed strings.
Raises:
RuntimeError: The given result id was invalid.
"""
#print("Extracting text...")
if res != 0: if res != 0:
raise RuntimeError raise RuntimeError
cdef int n_segments = whisper_full_n_segments(self.ctx) cdef int n_segments = whisper_full_n_segments(self.ctx)