init

Changes made (not exhaustive): - changed defaults - whisper.cpp submodule set to 1.2.0 - removed `requests` dependency - models dir can be changed in constructor - added support for setting params - added back support for `large-v1` model - added support for english-only models
2023-02-18 22:59:42 +00:00 · 2023-02-18 22:59:42 +00:00 · af035ea355
commit af035ea355
parent e2581c8aad
8 changed files with 268 additions and 229 deletions
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@ -1,40 +0,0 @@
 name: build_wheels
 run-name: ${{ github.actor }} is building wheels
 on: [push]
 jobs:
  build_wheels:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
    steps:
      - uses: actions/checkout@v2
      - name: Checkout submodules
        run: |
          git submodule update --init --recursive
      - uses: actions/setup-python@v2
        with:
          python-version: '3.10'
      - name: Setup pip
        run: |
           python -m pip install --upgrade pip
           python -m pip install cibuildwheel==1.6.4
      - name:  Install
        run:   |
               if [ "$RUNNER_OS" == "Linux" ]; then
                    sudo apt-get update
                    sudo apt-get install gcc g++
               fi
        shell: bash
      - name: Build wheel
        run: python -m cibuildwheel --output-dir dist/
        env:
          CIBW_BUILD: cp36-* cp37-* cp38-*
      - uses: actions/upload-artifact@v2
        with:
          path: ./dist/*.whl
--- a/.gitignore
+++ b/.gitignore
@ -127,3 +127,5 @@ dmypy.json
 # Pyre type checker
 .pyre/
 whispercpp.cpp
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +1,4 @@
 [submodule "whisper.cpp"]
 	path = whisper.cpp
 	url = https://github.com/ggerganov/whisper.cpp
 	branch = b2083c5d02db9a1e6dbb3d58254fd65ebfff4b5d
--- a/README.md
+++ b/README.md
@ -1,12 +1,23 @@
 Python bindings for whisper.cpp
 ===============================
-`pip install git+https://github.com/o4dev/whispercpp.py`
+```
 git clone --recurse-submodules https://git.ecker.tech/lightmare/whispercpp.py
 cd whispercpp.py
 pip install .
 ```
 or
 ```
 git clone https://git.ecker.tech/lightmare/whispercpp.py
 cd whispercpp.py
 git submodule update --init
 pip install .
 ```
 ```python
 from whispercpp import Whisper
-w = Whisper('tiny')
+w = Whisper('tiny', models_dir='./models/', language=b'en')
 result = w.transcribe("myfile.mp3")
 text = w.extract_text(result)
@ -14,3 +25,12 @@ text = w.extract_text(result)
 Note: default parameters might need to be tweaked.  
 See Whispercpp.pyx.
 Changes made (not exhaustive):
 - changed defaults
 - whisper.cpp submodule set to 1.2.0
 - removed `requests` dependency
 - models dir can be changed in constructor
 - added support for setting params
 - added back support for `large-v1` model
 - added support for english-only models
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
 Cython
 numpy
 ffmpeg-python
--- a/setup.py
+++ b/setup.py
@ -34,7 +34,6 @@ setup(
    include_dirs = ['./whisper.cpp/', numpy.get_include()],
    install_requires=[
      'numpy',
-      'ffmpeg-python',
+      'ffmpeg-python'
      'requests'
    ],
 )
--- a/whispercpp.pxd
+++ b/whispercpp.pxd
@ -10,8 +10,8 @@ cdef nogil:
 	int WHISPER_CHUNK_SIZE = 30
 	int SAMPLE_RATE = 16000
 	char* TEST_FILE = b'test.wav'
-    char* DEFAULT_MODEL = b'ggml-tiny.bin'
+	char* DEFAULT_MODEL = b'ggml-base.bin'
-    char* LANGUAGE = b'fr'
+	char* LANGUAGE = b'en'
 	ctypedef struct audio_data:
 		float* frames;
 		int n_frames;
--- a/whispercpp.pyx
+++ b/whispercpp.pyx
@ -3,43 +3,60 @@
 import ffmpeg
 import numpy as np
-import requests
+import urllib.request
 import os
 from pathlib import Path
 MODELS_DIR = str(Path('~/.ggml-models').expanduser())
 print("Saving models to:", MODELS_DIR)
 cimport numpy as cnp
 cdef int SAMPLE_RATE = 16000
 cdef char* TEST_FILE = 'test.wav'
-cdef char* DEFAULT_MODEL = 'tiny'
+cdef char* DEFAULT_MODEL = 'base'
-cdef char* LANGUAGE = b'fr'
+cdef char* LANGUAGE = b'en'
 cdef int N_THREADS = os.cpu_count()
 cdef _Bool PRINT_REALTIME = False
 cdef _Bool PRINT_PROGRESS = False
 cdef _Bool TRANSLATE = False
 MODELS = {
 	'ggml-tiny.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin',
 	'ggml-tiny.en.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin',
 	'ggml-base.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-base.bin',
 	'ggml-base.en.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin',
 	'ggml-small.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-small.bin',
 	'ggml-small.en.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin',
 	'ggml-medium.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin',
 	'ggml-medium.en.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-medium.en.bin',
 	'ggml-large-v1.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-large-v1.bin',
 	'ggml-large.bin': 'https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-large.bin',
 }
-def model_exists(model):
+def model_exists(model, models_dir=MODELS_DIR):
-    return os.path.exists(Path(MODELS_DIR).joinpath(model))
+	return os.path.exists(Path(models_dir).joinpath(model))
-def download_model(model):
+def download_model(model, models_dir=MODELS_DIR):
-    if model_exists(model):
+	"""Downloads ggml model with the given identifier
 	The filenames mirror the ones given in ggerganov's repos.
 	e.g. 'small' becomes 'ggml-small.bin'
 	Args:
 	    model: The model identifier
 	    models_dir: The path where the file is written to
 	"""
 	if model_exists(model, models_dir=models_dir):
 		return
-    print(f'Downloading {model}...')
+	print(f'Downloading {model} to {models_dir}...')
 	url = MODELS[model]
-    r = requests.get(url, allow_redirects=True)
+	os.makedirs(models_dir, exist_ok=True)
-    os.makedirs(MODELS_DIR, exist_ok=True)
+	with urllib.request.urlopen(url) as r:
-    with open(Path(MODELS_DIR).joinpath(model), 'wb') as f:
+		with open(Path(models_dir).joinpath(model), 'wb') as f:
-        f.write(r.content)
+			f.write(r.read())
 cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] load_audio(bytes file, int sr = SAMPLE_RATE):
@ -57,7 +74,7 @@ cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] load_audio(bytes file, int sr
 				capture_stderr=True
 			)
 		)[0]
-    except:
+	except Exception:
 		raise RuntimeError(f"File '{file}' not found")
 	cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = (
@ -68,43 +85,80 @@ cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] load_audio(bytes file, int sr
 	return frames
-cdef whisper_full_params default_params() nogil:
+cdef whisper_full_params set_params(_Bool print_realtime, _Bool print_progress, _Bool translate, char* language, int n_threads) nogil:
 	cdef whisper_full_params params = whisper_full_default_params(
 		whisper_sampling_strategy.WHISPER_SAMPLING_GREEDY
 	)
-    params.print_realtime = True
+	params.print_realtime = print_realtime
-    params.print_progress = True
+	params.print_progress = print_progress
-    params.translate = False
+	params.translate = translate
-    params.language = <const char *> LANGUAGE
+	params.language = <const char *> language
-    n_threads = N_THREADS
+	n_threads = n_threads
 	return params
 cdef class Whisper:
 	cdef whisper_context * ctx
 	cdef whisper_full_params params
-    def __init__(self, model=DEFAULT_MODEL, pb=None):
+	def __init__(self, model = DEFAULT_MODEL, models_dir = MODELS_DIR, _Bool print_realtime = PRINT_REALTIME, _Bool print_progress = PRINT_PROGRESS, _Bool translate = TRANSLATE, char* language = LANGUAGE, int n_threads = N_THREADS, _Bool print_system_info = False): # not pretty, look for a way to use kwargs?
-        model_fullname = f'ggml-{model}.bin'.encode('utf8')
+		"""Constructor for Whisper class.
-        download_model(model_fullname)
+
-        model_path = Path(MODELS_DIR).joinpath(model_fullname)
+		Automatically checks for model and downloads it if necessary.
 		Args:
 		    model: Model identifier, e.g. 'base' (see MODELS)
 		    models_dir: The path where the models should be stored
 		    print_realtime: whisper.cpp's real time transcription output
 		    print_progress: whisper.cpp's progress indicator
 		    translate: whisper.cpp's translation option
 		    language: Which language to use. Must be a byte string.
 		    n_threads: Amount of threads to use
 		    print_system_info: whisper.cpp's system info output
 		"""
 		model_fullname = f'ggml-{model}.bin' #.encode('utf8')
 		download_model(model_fullname, models_dir=models_dir)
 		model_path = Path(models_dir).joinpath(model_fullname)
 		cdef bytes model_b = str(model_path).encode('utf8')
 		self.ctx = whisper_init(model_b)
-        self.params = default_params()
+		self.params = set_params(print_realtime, print_progress, translate, language, n_threads)
 		if print_system_info:
 			whisper_print_system_info()
 	def __dealloc__(self):
 		whisper_free(self.ctx)
 	def transcribe(self, filename = TEST_FILE):
-        print("Loading data..")
+		"""Transcribes from given file.
 		Args:
 		    filename: Path to file
 		Returns:
 		    A result id for extract_text(...)
 		Raises:
 		    RuntimeError: The given file could not be found
 		"""
 		#print(f"Loading data from '{filename}'...")
 		cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = load_audio(<bytes>filename)
-        print("Transcribing..")
+		#print("Transcribing..")
 		return whisper_full(self.ctx, self.params, &frames[0], len(frames))
 	def extract_text(self, int res):
-        print("Extracting text...")
+		"""Extracts the text from a transcription.
 		Args:
 		    res: A result id from transcribe(...)
 		Results:
 		    A list of transcribed strings.
 		Raises:
 		    RuntimeError: The given result id was invalid.
 		"""
 		#print("Extracting text...")
 		if res != 0:
 			raise RuntimeError
 		cdef int n_segments = whisper_full_n_segments(self.ctx)