crammed in vall_e.cpp support to finally justify creating it (and a bunch of other things)

This commit is contained in:
ecker 2025-08-02 23:02:49 -05:00
parent 4f1ce314a5
commit 73ca9bb168
52 changed files with 13972 additions and 37 deletions

2
.gitignore vendored
View File

@ -55,3 +55,5 @@
*.otf
*.bin
models/
llm/
tmp/

View File

@ -62,10 +62,10 @@ LIBS += -L$(ENGINE_LIB_DIR) -L$(LIB_DIR)/$(PREFIX_PATH) -L$(LIB_DIR)/$(ARCH
LINKS += $(UF_LIBS) $(EXT_LIBS) $(DEPS)
DEPS +=
FLAGS +=
FLAGS += # -DUF_DEBUG
ifneq (,$(findstring -DUF_DEBUG,$(FLAGS)))
REQ_DEPS += meshoptimizer toml xatlas curl ffx:fsr cpptrace # ncurses openvr draco discord bullet ultralight-ux
REQ_DEPS += meshoptimizer toml xatlas curl ffx:fsr cpptrace vall_e # ncurses openvr draco discord bullet ultralight-ux
FLAGS += -g
endif
ifneq (,$(findstring win64,$(ARCH)))
@ -215,7 +215,7 @@ ifneq (,$(findstring bullet,$(REQ_DEPS)))
DEPS += -lbulletdynamics -lbulletcollision -lbulletlinearmath
else
DEPS += -lBulletDynamics -lBulletCollision -lLinearMath
INCS += -I./dep/bullet/
INCS += -I./dep/include/bullet/
endif
endif
ifneq (,$(findstring reactphysics,$(REQ_DEPS)))
@ -248,6 +248,11 @@ endif
ifneq (,$(findstring toml,$(REQ_DEPS)))
FLAGS += -DUF_USE_TOML
endif
ifneq (,$(findstring vall_e,$(REQ_DEPS)))
FLAGS += -DUF_USE_VALL_E
INCS += -I./dep/include/vall_e.cpp/
DEPS += -lvall_e
endif
# SRCS_DLL += $(wildcard $(ENGINE_SRC_DIR)/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*/*/*.cpp)
#SRCS_DLL += $(wildcard $(ENGINE_SRC_DIR)/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*/*/*.cpp) $(wildcard $(EXT_SRC_DIR)/*.cpp) $(wildcard $(EXT_SRC_DIR)/*/*.cpp) $(wildcard $(EXT_SRC_DIR)/*/*/*.cpp) $(wildcard $(EXT_SRC_DIR)/*/*/*/*.cpp) $(wildcard $(EXT_SRC_DIR)/*/*/*/*/*.cpp)

View File

@ -18,7 +18,7 @@ To compile, run `make`. The outputted libraries and executables will be placed i
## Run
Currently, assets are not provided due to size (but mostly due to being test assets).
Currently, a barebones setup is provided via the [`.zip` bundle](https://github.com/e-c-k-e-r/engine/releases/tag/bundle).
*If* adequate assets are provided, run `./program.sh` or `make run`. This ensures the path to the required libraries are added to the PATH.

View File

@ -288,6 +288,9 @@
"encoding": "msgpack",
"compression": "gz"
},
"vall_e": {
"enabled": true
},
"imgui": {
"enabled": true
},

View File

@ -0,0 +1,113 @@
#pragma once
#include <vector>
#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "lstm.h"
#include "utils.h"
struct encodec_decoder_block {
// upsampling layers
struct ggml_tensor *us_conv_w;
struct ggml_tensor *us_conv_b;
// conv1
struct ggml_tensor *conv_1_w;
struct ggml_tensor *conv_1_b;
// conv2
struct ggml_tensor *conv_2_w;
struct ggml_tensor *conv_2_b;
// shortcut
struct ggml_tensor *conv_sc_w;
struct ggml_tensor *conv_sc_b;
};
struct encodec_decoder {
struct ggml_tensor *init_conv_w;
struct ggml_tensor *init_conv_b;
encodec_lstm lstm;
struct ggml_tensor *final_conv_w;
struct ggml_tensor *final_conv_b;
std::vector<encodec_decoder_block> blocks;
};
struct ggml_tensor *encodec_forward_decoder(
const struct encodec_decoder *decoder, struct ggml_context *ctx0,
struct ggml_tensor *quantized_out, const int *ratios, const int kernel_size, const int res_kernel_size,
const int stride) {
if (!quantized_out) {
fprintf(stderr, "%s: null input tensor\n", __func__);
return NULL;
}
struct ggml_tensor *inpL = strided_conv_1d(
ctx0, quantized_out, decoder->init_conv_w, decoder->init_conv_b, stride);
// lstm
{
struct ggml_tensor *cur = inpL;
const encodec_lstm lstm = decoder->lstm;
// first lstm layer
char l0_prefix[7] = "dec_l0";
struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix);
// second lstm layer
char l1_prefix[7] = "dec_l1";
struct ggml_tensor *out = forward_pass_lstm_unilayer(
ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix);
inpL = ggml_add(ctx0, inpL, out);
}
for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
encodec_decoder_block block = decoder->blocks[layer_ix];
// upsampling layers
inpL = ggml_elu(ctx0, inpL);
inpL = strided_conv_transpose_1d(
ctx0, inpL, block.us_conv_w, block.us_conv_b, ratios[layer_ix]);
struct ggml_tensor *current = inpL;
// shortcut
struct ggml_tensor *shortcut = strided_conv_1d(
ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);
// conv1
current = ggml_elu(ctx0, current);
current = strided_conv_1d(
ctx0, current, block.conv_1_w, block.conv_1_b, stride);
// conv2
current = ggml_elu(ctx0, current);
current = strided_conv_1d(
ctx0, current, block.conv_2_w, block.conv_2_b, stride);
// residual connection
inpL = ggml_add(ctx0, current, shortcut);
}
// final conv
inpL = ggml_elu(ctx0, inpL);
struct ggml_tensor *decoded_inp = strided_conv_1d(
ctx0, inpL, decoder->final_conv_w, decoder->final_conv_b, stride);
return decoded_inp;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,184 @@
/*
Copyright 2024 Pierre-Antoine Bannier
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
/*
* This file contains the declarations of the structs and functions used in the encodec library.
* The library provides functionality for audio compression and decompression using a custom model.
* The model consists of an encoder, a quantizer and a decoder, each with their own set of parameters.
* The library also provides functions for loading and freeing the model, as well as compressing and decompressing audio data.
*
*/
#pragma once
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "ggml.h"
#ifdef __cplusplus
extern "C" {
#endif
struct encodec_context;
struct encodec_statistics {
// The time taken to load the model.
int64_t t_load_us;
// The time taken to compute the model.
int64_t t_compute_us;
};
/**
* Loads an encodec model from the specified file path.
*
* @param model_path The file path to the encodec model.
* @param offset The offset (in bytes) to the start of the model in the file.
* @param n_gpu_layers The number of GPU layers to use.
* @return A pointer to the encodec context struct.
*/
struct encodec_context *encodec_load_model(
const char *model_path,
const int offset,
int n_gpu_layers);
/**
* Sets the target bandwidth for the given encodec context.
*
* @param ectx The encodec context to set the target bandwidth for.
* @param bandwidth The target bandwidth to set, in bits per second.
*/
void encodec_set_target_bandwidth(
struct encodec_context *ectx,
int bandwidth);
/**
* Sets the sample rate for the given encodec context.
*
* @param ectx The encodec context to set the target bandwidth for.
* @param sample_rate The sample rate to set.
*/
void encodec_set_sample_rate(
struct encodec_context *ectx,
int sample_rate);
/**
* Reconstructs audio from raw audio data using the specified encodec context.
*
* @param ectx The encodec context to use for reconstruction.
* @param raw_audio The raw audio data to reconstruct.
* @param n_samples The number of samples in the raw audio buffer.
* @param n_threads The number of threads to use for reconstruction.
* @return True if the reconstruction was successful, false otherwise.
*/
bool encodec_reconstruct_audio(
struct encodec_context *ectx,
const float *raw_audio,
const int n_samples,
int n_threads);
/**
* Compresses audio data using the specified encodec context.
*
* @param ectx The encodec context to use for compression.
* @param raw_audio The raw audio data to compress.
* @param n_samples The number of samples in the raw audio buffer.
* @param n_threads The number of threads to use for compression.
* @return True if the compression was successful, false otherwise.
*/
bool encodec_compress_audio(
struct encodec_context *ectx,
const float *raw_audio,
const int n_samples,
int n_threads);
/**
* Decompresses audio data using the specified encodec context.
*
* @param ectx The encodec context to use for decompression.
* @param codes The compressed audio data to decompress.
* @param n_codes The number of codes in the codes buffer.
* @param n_threads The number of threads to use for decompression.
* @return True if the audio data was successfully decompressed, false otherwise.
*/
bool encodec_decompress_audio(
struct encodec_context *ectx,
const int32_t *codes,
const int n_codes,
int n_threads);
/**
* Gets the audio data from the given encodec context.
*
* @param ectx The encodec context to get the audio data from.
* @return A pointer to the audio data.
*/
float * encodec_get_audio(
struct encodec_context *ectx);
/**
* Gets the size of the audio data from the given encodec context.
*
* @param ectx The encodec context to get the audio size from.
* @return The size of the audio data.
*/
int encodec_get_audio_size(
struct encodec_context *ectx);
/**
* Gets the code data from the given encodec context.
*
* @param ectx The encodec context to get the code data from.
* @return A pointer to the code data.
*/
int32_t * encodec_get_codes(
struct encodec_context *ectx);
/**
* Gets the size of the code data from the given encodec context.
*
* @param ectx The encodec context to get the code size from.
* @return The size of the code data.
*/
int encodec_get_codes_size(
struct encodec_context *ectx);
/**
* Gets the statistics for the given encodec context.
*
* @param ectx The encodec context to get the statistics for.
* @return A pointer to the statistics struct.
*/
const struct encodec_statistics* encodec_get_statistics(
struct encodec_context *ectx);
/**
* Reset the statistics for the given encodec context.
*
* @param ectx The encodec context to reset the statistics for.
*/
void encodec_reset_statistics(
struct encodec_context *ectx);
/**
* @brief Frees the memory allocated for an encodec context.
*
* @param ectx The encodec context to free.
*/
void encodec_free(
struct encodec_context *ectx);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,109 @@
#pragma once
#include <vector>
#include "ggml.h"
#include "lstm.h"
// res + downsample block at some ratio
struct encodec_encoder_block {
// conv1
struct ggml_tensor *conv_1_w;
struct ggml_tensor *conv_1_b;
// conv2
struct ggml_tensor *conv_2_w;
struct ggml_tensor *conv_2_b;
// shortcut
struct ggml_tensor *conv_sc_w;
struct ggml_tensor *conv_sc_b;
// downsampling layers
struct ggml_tensor *ds_conv_w;
struct ggml_tensor *ds_conv_b;
};
struct encodec_encoder {
struct ggml_tensor *init_conv_w;
struct ggml_tensor *init_conv_b;
encodec_lstm lstm;
struct ggml_tensor *final_conv_w;
struct ggml_tensor *final_conv_b;
std::vector<encodec_encoder_block> blocks;
};
struct ggml_tensor *encodec_forward_encoder(
const struct encodec_encoder *encoder, struct ggml_context *ctx0,
struct ggml_tensor *inp, const int * ratios, const int kernel_size, const int res_kernel_size,
const int stride) {
if (!inp) {
fprintf(stderr, "%s: null input tensor\n", __func__);
return NULL;
}
struct ggml_tensor *inpL = strided_conv_1d(
ctx0, inp, encoder->init_conv_w, encoder->init_conv_b, stride);
for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
encodec_encoder_block block = encoder->blocks[layer_ix];
struct ggml_tensor *current = inpL;
// shortcut
struct ggml_tensor *shortcut = strided_conv_1d(
ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);
// conv1
current = ggml_elu(ctx0, current);
current = strided_conv_1d(
ctx0, current, block.conv_1_w, block.conv_1_b, stride);
// conv2
current = ggml_elu(ctx0, current);
current = strided_conv_1d(
ctx0, current, block.conv_2_w, block.conv_2_b, stride);
// residual connection
inpL = ggml_add(ctx0, current, shortcut);
// downsampling layers
inpL = ggml_elu(ctx0, inpL);
inpL = strided_conv_1d(
ctx0, inpL, block.ds_conv_w, block.ds_conv_b, ratios[3 - layer_ix]);
}
// lstm
{
struct ggml_tensor *cur = inpL;
const encodec_lstm lstm = encoder->lstm;
// first lstm layer
char l0_prefix[7] = "enc_l0";
struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix);
// second lstm layer
char l1_prefix[7] = "enc_l1";
struct ggml_tensor *out = forward_pass_lstm_unilayer(
ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix);
inpL = ggml_add(ctx0, inpL, out);
}
// final conv
inpL = ggml_elu(ctx0, inpL);
struct ggml_tensor *encoded_inp = strided_conv_1d(
ctx0, inpL, encoder->final_conv_w, encoder->final_conv_b, stride);
return encoded_inp;
}

View File

@ -0,0 +1,103 @@
/*
* Copyright (C) 2017 Reece H. Dunn
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see: <http://www.gnu.org/licenses/>.
*/
#ifndef ESPEAK_NG_ENCODING_H
#define ESPEAK_NG_ENCODING_H
#include <stdint.h>
#ifdef __cplusplus
extern "C"
{
#endif
typedef enum
{
ESPEAKNG_ENCODING_UNKNOWN,
ESPEAKNG_ENCODING_US_ASCII,
ESPEAKNG_ENCODING_ISO_8859_1,
ESPEAKNG_ENCODING_ISO_8859_2,
ESPEAKNG_ENCODING_ISO_8859_3,
ESPEAKNG_ENCODING_ISO_8859_4,
ESPEAKNG_ENCODING_ISO_8859_5,
ESPEAKNG_ENCODING_ISO_8859_6,
ESPEAKNG_ENCODING_ISO_8859_7,
ESPEAKNG_ENCODING_ISO_8859_8,
ESPEAKNG_ENCODING_ISO_8859_9,
ESPEAKNG_ENCODING_ISO_8859_10,
ESPEAKNG_ENCODING_ISO_8859_11,
// ISO-8859-12 is not a valid encoding.
ESPEAKNG_ENCODING_ISO_8859_13,
ESPEAKNG_ENCODING_ISO_8859_14,
ESPEAKNG_ENCODING_ISO_8859_15,
ESPEAKNG_ENCODING_ISO_8859_16,
ESPEAKNG_ENCODING_KOI8_R,
ESPEAKNG_ENCODING_ISCII,
ESPEAKNG_ENCODING_UTF_8,
ESPEAKNG_ENCODING_ISO_10646_UCS_2,
} espeak_ng_ENCODING;
ESPEAK_NG_API espeak_ng_ENCODING
espeak_ng_EncodingFromName(const char *encoding);
typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER;
ESPEAK_NG_API espeak_ng_TEXT_DECODER *
create_text_decoder(void);
ESPEAK_NG_API void
destroy_text_decoder(espeak_ng_TEXT_DECODER *decoder);
ESPEAK_NG_API espeak_ng_STATUS
text_decoder_decode_string(espeak_ng_TEXT_DECODER *decoder,
const char *string,
int length,
espeak_ng_ENCODING encoding);
ESPEAK_NG_API espeak_ng_STATUS
text_decoder_decode_string_auto(espeak_ng_TEXT_DECODER *decoder,
const char *string,
int length,
espeak_ng_ENCODING encoding);
ESPEAK_NG_API espeak_ng_STATUS
text_decoder_decode_wstring(espeak_ng_TEXT_DECODER *decoder,
const wchar_t *string,
int length);
ESPEAK_NG_API espeak_ng_STATUS
text_decoder_decode_string_multibyte(espeak_ng_TEXT_DECODER *decoder,
const void *input,
espeak_ng_ENCODING encoding,
int flags);
ESPEAK_NG_API int
text_decoder_eof(espeak_ng_TEXT_DECODER *decoder);
ESPEAK_NG_API uint32_t
text_decoder_getc(espeak_ng_TEXT_DECODER *decoder);
ESPEAK_NG_API uint32_t
text_decoder_peekc(espeak_ng_TEXT_DECODER *decoder);
ESPEAK_NG_API const void *
text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,223 @@
/* eSpeak NG API.
*
* Copyright (C) 2015-2017 Reece H. Dunn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ESPEAK_NG_H
#define ESPEAK_NG_H
#include <espeak-ng/speak_lib.h>
#ifdef __cplusplus
extern "C"
{
#endif
#if defined(_WIN32) || defined(_WIN64)
#ifdef LIBESPEAK_NG_EXPORT
#define ESPEAK_NG_API __declspec(dllexport)
#else
#define ESPEAK_NG_API __declspec(dllimport)
#endif
#else
#define ESPEAK_NG_API
#endif
#define ESPEAKNG_DEFAULT_VOICE "en"
typedef enum {
ENS_GROUP_MASK = 0x70000000,
ENS_GROUP_ERRNO = 0x00000000, /* Values 0-255 map to errno error codes. */
ENS_GROUP_ESPEAK_NG = 0x10000000, /* eSpeak NG error codes. */
/* eSpeak NG 1.49.0 */
ENS_OK = 0,
ENS_COMPILE_ERROR = 0x100001FF,
ENS_VERSION_MISMATCH = 0x100002FF,
ENS_FIFO_BUFFER_FULL = 0x100003FF,
ENS_NOT_INITIALIZED = 0x100004FF,
ENS_AUDIO_ERROR = 0x100005FF,
ENS_VOICE_NOT_FOUND = 0x100006FF,
ENS_MBROLA_NOT_FOUND = 0x100007FF,
ENS_MBROLA_VOICE_NOT_FOUND = 0x100008FF,
ENS_EVENT_BUFFER_FULL = 0x100009FF,
ENS_NOT_SUPPORTED = 0x10000AFF,
ENS_UNSUPPORTED_PHON_FORMAT = 0x10000BFF,
ENS_NO_SPECT_FRAMES = 0x10000CFF,
ENS_EMPTY_PHONEME_MANIFEST = 0x10000DFF,
ENS_SPEECH_STOPPED = 0x10000EFF,
/* eSpeak NG 1.49.2 */
ENS_UNKNOWN_PHONEME_FEATURE = 0x10000FFF,
ENS_UNKNOWN_TEXT_ENCODING = 0x100010FF,
} espeak_ng_STATUS;
typedef enum {
ENOUTPUT_MODE_SYNCHRONOUS = 0x0001,
ENOUTPUT_MODE_SPEAK_AUDIO = 0x0002,
} espeak_ng_OUTPUT_MODE;
typedef enum {
ENGENDER_UNKNOWN = 0,
ENGENDER_MALE = 1,
ENGENDER_FEMALE = 2,
ENGENDER_NEUTRAL = 3,
} espeak_ng_VOICE_GENDER;
typedef struct
{
void (*outputPhoSymbol)(char* pho_code,int pho_type);
void (*outputSilence)(short echo_tail);
void (*outputVoiced)(short sample);
void (*outputUnvoiced)(short sample);
} espeak_ng_OUTPUT_HOOKS;
/* eSpeak NG 1.49.0 */
typedef struct espeak_ng_ERROR_CONTEXT_ *espeak_ng_ERROR_CONTEXT;
ESPEAK_NG_API void
espeak_ng_ClearErrorContext(espeak_ng_ERROR_CONTEXT *context);
ESPEAK_NG_API void
espeak_ng_GetStatusCodeMessage(espeak_ng_STATUS status,
char *buffer,
size_t length);
ESPEAK_NG_API void
espeak_ng_PrintStatusCodeMessage(espeak_ng_STATUS status,
FILE *out,
espeak_ng_ERROR_CONTEXT context);
ESPEAK_NG_API void
espeak_ng_InitializePath(const char *path);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_Initialize(espeak_ng_ERROR_CONTEXT *context);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_InitializeOutput(espeak_ng_OUTPUT_MODE output_mode,
int buffer_length,
const char *device);
ESPEAK_NG_API int
espeak_ng_GetSampleRate(void);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_SetParameter(espeak_PARAMETER parameter,
int value,
int relative);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_SetPhonemeEvents(int enable, int ipa);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_SetPunctuationList(const wchar_t *punctlist);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_SetVoiceByName(const char *name);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_SetVoiceByFile(const char *filename);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_SetVoiceByProperties(espeak_VOICE *voice_selector);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_Synthesize(const void *text,
size_t size,
unsigned int position,
espeak_POSITION_TYPE position_type,
unsigned int end_position,
unsigned int flags,
unsigned int *unique_identifier,
void *user_data);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_SynthesizeMark(const void *text,
size_t size,
const char *index_mark,
unsigned int end_position,
unsigned int flags,
unsigned int *unique_identifier,
void *user_data);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_SpeakKeyName(const char *key_name);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_SpeakCharacter(wchar_t character);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_Cancel(void);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_Synchronize(void);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_Terminate(void);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_CompileDictionary(const char *dsource,
const char *dict_name,
FILE *log,
int flags,
espeak_ng_ERROR_CONTEXT *context);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_CompileMbrolaVoice(const char *path,
FILE *log,
espeak_ng_ERROR_CONTEXT *context);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_CompilePhonemeData(long rate,
FILE *log,
espeak_ng_ERROR_CONTEXT *context);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_CompileIntonation(FILE *log,
espeak_ng_ERROR_CONTEXT *context);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_CompileIntonationPath(const char *source_path,
const char *destination_path,
FILE *log,
espeak_ng_ERROR_CONTEXT *context);
/* eSpeak NG 1.49.1 */
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_CompilePhonemeDataPath(long rate,
const char *source_path,
const char *destination_path,
FILE *log,
espeak_ng_ERROR_CONTEXT *context);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_SetOutputHooks(espeak_ng_OUTPUT_HOOKS* hooks);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_SetConstF0(int f0);
ESPEAK_NG_API espeak_ng_STATUS
espeak_ng_SetRandSeed(long seed);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,709 @@
#ifndef SPEAK_LIB_H
#define SPEAK_LIB_H
/***************************************************************************
* Copyright (C) 2005 to 2012 by Jonathan Duddington *
* email: jonsd@users.sourceforge.net *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, see: *
* <http://www.gnu.org/licenses/>. *
***************************************************************************/
/*************************************************************/
/* This is the header file for the library version of espeak */
/* */
/*************************************************************/
#include <stdio.h>
#include <stddef.h>
#if defined(_WIN32) || defined(_WIN64)
#ifdef LIBESPEAK_NG_EXPORT
#define ESPEAK_API __declspec(dllexport)
#else
#define ESPEAK_API __declspec(dllimport)
#endif
#else
#define ESPEAK_API
#endif
#define ESPEAK_API_REVISION 12
/*
Revision 2
Added parameter "options" to eSpeakInitialize()
Revision 3
Added espeakWORDGAP to espeak_PARAMETER
Revision 4
Added flags parameter to espeak_CompileDictionary()
Revision 5
Added espeakCHARS_16BIT
Revision 6
Added macros: espeakRATE_MINIMUM, espeakRATE_MAXIMUM, espeakRATE_NORMAL
Revision 7 24.Dec.2011
Changed espeak_EVENT structure to add id.string[] for phoneme mnemonics.
Added espeakINITIALIZE_PHONEME_IPA option for espeak_Initialize() to report phonemes as IPA names.
Revision 8 26.Apr.2013
Added function espeak_TextToPhonemes().
Revision 9 30.May.2013
Changed function espeak_TextToPhonemes().
Revision 10 29.Aug.2014
Changed phonememode parameter to espeak_TextToPhonemes() and espeak_SetPhonemeTrace
Revision 11 (espeak-ng)
Made ESPEAK_API import/export symbols correctly on Windows.
Revision 12 (espeak-ng)
Exposed espeak_SetPhonemeCallback. This is available in eSpeak, but was not exposed in this header.
*/
/********************/
/* Initialization */
/********************/
// values for 'value' in espeak_SetParameter(espeakRATE, value, 0), nominally in words-per-minute
#define espeakRATE_MINIMUM 80
#define espeakRATE_MAXIMUM 450
#define espeakRATE_NORMAL 175
typedef enum {
espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list.
espeakEVENT_WORD = 1, // Start of word
espeakEVENT_SENTENCE = 2, // Start of sentence
espeakEVENT_MARK = 3, // Mark
espeakEVENT_PLAY = 4, // Audio element
espeakEVENT_END = 5, // End of sentence or clause
espeakEVENT_MSG_TERMINATED = 6, // End of message
espeakEVENT_PHONEME = 7, // Phoneme, if enabled in espeak_Initialize()
espeakEVENT_SAMPLERATE = 8 // Set sample rate
} espeak_EVENT_TYPE;
typedef struct {
espeak_EVENT_TYPE type;
unsigned int unique_identifier; // message identifier (or 0 for key or character)
int text_position; // the number of characters from the start of the text
int length; // word length, in characters (for espeakEVENT_WORD)
int audio_position; // the time in mS within the generated speech output data
int sample; // sample id (internal use)
void* user_data; // pointer supplied by the calling program
union {
int number; // used for WORD and SENTENCE events.
const char *name; // used for MARK and PLAY events. UTF8 string
char string[8]; // used for phoneme names (UTF8). Terminated by a zero byte unless the name needs the full 8 bytes.
} id;
} espeak_EVENT;
/*
When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called.
In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED).
In PLAYBACK mode, the callback function is called as soon as an event happens.
For example suppose that the following message is supplied to espeak_Synth:
"hello, hello."
* Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function :
** Block 1:
<audio data> +
List of events: SENTENCE + WORD + LIST_TERMINATED
** Block 2:
<audio data> +
List of events: WORD + END + LIST_TERMINATED
** Block 3:
no audio data
List of events: MSG_TERMINATED + LIST_TERMINATED
* Once processed in PLAYBACK mode, it could lead to 5 calls of the callback function:
** SENTENCE
** WORD (call when the sounds are actually played)
** WORD
** END (call when the end of sentence is actually played.)
** MSG_TERMINATED
The MSG_TERMINATED event is the last event. It can inform the calling program to clear the user data related to the message.
So if the synthesis must be stopped, the callback function is called for each pending message with the MSG_TERMINATED event.
A MARK event indicates a <mark> element in the text.
A PLAY event indicates an <audio> element in the text, for which the calling program should play the named sound file.
*/
typedef enum {
POS_CHARACTER = 1,
POS_WORD,
POS_SENTENCE
} espeak_POSITION_TYPE;
typedef enum {
/* PLAYBACK mode: plays the audio data, supplies events to the calling program*/
AUDIO_OUTPUT_PLAYBACK,
/* RETRIEVAL mode: supplies audio data and events to the calling program */
AUDIO_OUTPUT_RETRIEVAL,
/* SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed */
AUDIO_OUTPUT_SYNCHRONOUS,
/* Synchronous playback */
AUDIO_OUTPUT_SYNCH_PLAYBACK
} espeak_AUDIO_OUTPUT;
typedef enum {
EE_OK=0,
EE_INTERNAL_ERROR=-1,
EE_BUFFER_FULL=1,
EE_NOT_FOUND=2
} espeak_ERROR;
#define espeakINITIALIZE_PHONEME_EVENTS 0x0001
#define espeakINITIALIZE_PHONEME_IPA 0x0002
#define espeakINITIALIZE_DONT_EXIT 0x8000
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path, int options);
/* Must be called before any synthesis functions are called.
output: the audio data can either be played by eSpeak or passed back by the SynthCallback function.
buflength: The length in mS of sound buffers passed to the SynthCallback function.
Value=0 gives a default of 60mS.
This parameter is only used for AUDIO_OUTPUT_RETRIEVAL and AUDIO_OUTPUT_SYNCHRONOUS modes.
path: The directory which contains the espeak-ng-data directory, or NULL for the default location.
options: bit 0: 1=allow espeakEVENT_PHONEME events.
bit 1: 1= espeakEVENT_PHONEME events give IPA phoneme names, not eSpeak phoneme names
bit 15: 1=don't exit if espeak_data is not found (used for --help)
Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR).
*/
typedef int (t_espeak_callback)(short*, int, espeak_EVENT*);
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API void espeak_SetSynthCallback(t_espeak_callback* SynthCallback);
/* Must be called before any synthesis functions are called.
This specifies a function in the calling program which is called when a buffer of
speech sound data has been produced.
The callback function is of the form:
int SynthCallback(short *wav, int numsamples, espeak_EVENT *events);
wav: is the speech sound data which has been produced.
NULL indicates that the synthesis has been completed.
numsamples: is the number of entries in wav. This number may vary, may be less than
the value implied by the buflength parameter given in espeak_Initialize, and may
sometimes be zero (which does NOT indicate end of synthesis).
events: an array of espeak_EVENT items which indicate word and sentence events, and
also the occurrence if <mark> and <audio> elements within the text. The list of
events is terminated by an event of type = 0.
Callback returns: 0=continue synthesis, 1=abort synthesis.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API void espeak_SetUriCallback(int (*UriCallback)(int, const char*, const char*));
/* This function may be called before synthesis functions are used, in order to deal with
<audio> tags. It specifies a callback function which is called when an <audio> element is
encountered and allows the calling program to indicate whether the sound file which
is specified in the <audio> element is available and is to be played.
The callback function is of the form:
int UriCallback(int type, const char *uri, const char *base);
type: type of callback event. Currently only 1= <audio> element
uri: the "src" attribute from the <audio> element
base: the "xml:base" attribute (if any) from the <speak> element
Return: 1=don't play the sound, but speak the text alternative.
0=place a PLAY event in the event list at the point where the <audio> element
occurs. The calling program can then play the sound at that point.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API void espeak_SetPhonemeCallback(int (*PhonemeCallback)(const char *));
/********************/
/* Synthesis */
/********************/
#define espeakCHARS_AUTO 0
#define espeakCHARS_UTF8 1
#define espeakCHARS_8BIT 2
#define espeakCHARS_WCHAR 3
#define espeakCHARS_16BIT 4
#define espeakSSML 0x10
#define espeakPHONEMES 0x100
#define espeakENDPAUSE 0x1000
#define espeakKEEP_NAMEDATA 0x2000
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_ERROR espeak_Synth(const void *text,
size_t size,
unsigned int position,
espeak_POSITION_TYPE position_type,
unsigned int end_position,
unsigned int flags,
unsigned int* unique_identifier,
void* user_data);
/* Synthesize speech for the specified text. The speech sound data is passed to the calling
program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak.
text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters,
wide characters (wchar_t), or UTF8 encoding. Which of these is determined by the "flags"
parameter.
size: Equal to (or greatrer than) the size of the text data, in bytes. This is used in order
to allocate internal storage space for the text. This value is not used for
AUDIO_OUTPUT_SYNCHRONOUS mode.
position: The position in the text where speaking starts. Zero indicates speak from the
start of the text.
position_type: Determines whether "position" is a number of characters, words, or sentences.
Values:
end_position: If set, this gives a character position at which speaking will stop. A value
of zero indicates no end position.
flags: These may be OR'd together:
Type of character codes, one of:
espeakCHARS_UTF8 UTF8 encoding
espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language.
espeakCHARS_AUTO 8 bit or UTF8 (this is the default)
espeakCHARS_WCHAR Wide characters (wchar_t)
espeakCHARS_16BIT 16 bit characters.
espeakSSML Elements within < > are treated as SSML elements, or if not recognised are ignored.
espeakPHONEMES Text within [[ ]] is treated as phonemes codes (in espeak's Kirshenbaum encoding).
espeakENDPAUSE If set then a sentence pause is added at the end of the text. If not set then
this pause is suppressed.
unique_identifier: This must be either NULL, or point to an integer variable to
which eSpeak writes a message identifier number.
eSpeak includes this number in espeak_EVENT messages which are the result of
this call of espeak_Synth().
user_data: a pointer (or NULL) which will be passed to the callback function in
espeak_EVENT messages.
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_ERROR espeak_Synth_Mark(const void *text,
size_t size,
const char *index_mark,
unsigned int end_position,
unsigned int flags,
unsigned int* unique_identifier,
void* user_data);
/* Synthesize speech for the specified text. Similar to espeak_Synth() but the start position is
specified by the name of a <mark> element in the text.
index_mark: The "name" attribute of a <mark> element within the text which specified the
point at which synthesis starts. UTF8 string.
For the other parameters, see espeak_Synth()
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_ERROR espeak_Key(const char *key_name);
/* Speak the name of a keyboard key.
If key_name is a single character, it speaks the name of the character.
Otherwise, it speaks key_name as a text string.
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_ERROR espeak_Char(wchar_t character);
/* Speak the name of the given character
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR.
*/
/***********************/
/* Speech Parameters */
/***********************/
typedef enum {
espeakSILENCE=0, /* internal use */
espeakRATE=1,
espeakVOLUME=2,
espeakPITCH=3,
espeakRANGE=4,
espeakPUNCTUATION=5,
espeakCAPITALS=6,
espeakWORDGAP=7,
espeakOPTIONS=8, // reserved for misc. options. not yet used
espeakINTONATION=9,
espeakSSML_BREAK_MUL=10,
espeakRESERVED2=11,
espeakEMPHASIS, /* internal use */
espeakLINELENGTH, /* internal use */
espeakVOICETYPE, // internal, 1=mbrola
N_SPEECH_PARAM /* last enum */
} espeak_PARAMETER;
typedef enum {
espeakPUNCT_NONE=0,
espeakPUNCT_ALL=1,
espeakPUNCT_SOME=2
} espeak_PUNCT_TYPE;
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative);
/* Sets the value of the specified parameter.
relative=0 Sets the absolute value of the parameter.
relative=1 Sets a relative value of the parameter.
parameter:
espeakRATE: speaking speed in word per minute. Values 80 to 450.
espeakVOLUME: volume in range 0-200 or more.
0=silence, 100=normal full volume, greater values may produce amplitude compression or distortion
espeakPITCH: base pitch, range 0-100. 50=normal
espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal
espeakPUNCTUATION: which punctuation characters to announce:
value in espeak_PUNCT_TYPE (none, all, some),
see espeak_GetParameter() to specify which characters are announced.
espeakCAPITALS: announce capital letters by:
0=none,
1=sound icon,
2=spelling,
3 or higher, by raising pitch. This values gives the amount in Hz by which the pitch
of a word raised to indicate it has a capital letter.
espeakWORDGAP: pause between words, units of 10mS (at the default speed)
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API int espeak_GetParameter(espeak_PARAMETER parameter, int current);
/* current=0 Returns the default value of the specified parameter.
current=1 Returns the current value of the specified parameter, as set by SetParameter()
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_ERROR espeak_SetPunctuationList(const wchar_t *punctlist);
/* Specified a list of punctuation characters whose names are to be spoken when the
value of the Punctuation parameter is set to "some".
punctlist: A list of character codes, terminated by a zero character.
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR.
*/
#define espeakPHONEMES_SHOW 0x01
#define espeakPHONEMES_IPA 0x02
#define espeakPHONEMES_TRACE 0x08
#define espeakPHONEMES_MBROLA 0x10
#define espeakPHONEMES_TIE 0x80
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API void espeak_SetPhonemeTrace(int phonememode, FILE *stream);
/* phonememode: Controls the output of phoneme symbols for the text
bits 0-2:
value=0 No phoneme output (default)
value=1 Output the translated phoneme symbols for the text
value=2 as (1), but produces IPA phoneme names rather than ascii
bit 3: output a trace of how the translation was done (showing the matching rules and list entries)
bit 4: produce pho data for mbrola
bit 7: use (bits 8-23) as a tie within multi-letter phonemes names
bits 8-23: separator character, between phoneme names
stream output stream for the phoneme symbols (and trace). If stream=NULL then it uses stdout.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API const char *espeak_TextToPhonemes(const void **textptr, int textmode, int phonememode);
/* Translates text into phonemes. Call espeak_SetVoiceByName() first, to select a language.
It returns a pointer to a character string which contains the phonemes for the text up to
end of a sentence, or comma, semicolon, colon, or similar punctuation.
textptr: The address of a pointer to the input text which is terminated by a zero character.
On return, the pointer has been advanced past the text which has been translated, or else set
to NULL to indicate that the end of the text has been reached.
textmode: Type of character codes, one of:
espeakCHARS_UTF8 UTF8 encoding
espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language.
espeakCHARS_AUTO 8 bit or UTF8 (this is the default)
espeakCHARS_WCHAR Wide characters (wchar_t)
espeakCHARS_16BIT 16 bit characters.
phoneme_mode
bit 1: 0=eSpeak's ascii phoneme names, 1= International Phonetic Alphabet (as UTF-8 characters).
bit 7: use (bits 8-23) as a tie within multi-letter phonemes names
bits 8-23: separator character, between phoneme names
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API void espeak_CompileDictionary(const char *path, FILE *log, int flags);
/* Compile pronunciation dictionary for a language which corresponds to the currently
selected voice. The required voice should be selected before calling this function.
path: The directory which contains the language's '_rules' and '_list' files.
'path' should end with a path separator character ('/').
log: Stream for error reports and statistics information. If log=NULL then stderr will be used.
flags: Bit 0: include source line information for debug purposes (This is displayed with the
-X command line option).
*/
/***********************/
/* Voice Selection */
/***********************/
// voice table
typedef struct {
const char *name; // a given name for this voice. UTF8 string.
const char *languages; // list of pairs of (byte) priority + (string) language (and dialect qualifier)
const char *identifier; // the filename for this voice within espeak-ng-data/voices
unsigned char gender; // 0=none 1=male, 2=female,
unsigned char age; // 0=not specified, or age in years
unsigned char variant; // only used when passed as a parameter to espeak_SetVoiceByProperties
unsigned char xx1; // for internal use
int score; // for internal use
void *spare; // for internal use
} espeak_VOICE;
/* Note: The espeak_VOICE structure is used for two purposes:
1. To return the details of the available voices.
2. As a parameter to espeak_SetVoiceByProperties() in order to specify selection criteria.
In (1), the "languages" field consists of a list of (UTF8) language names for which this voice
may be used, each language name in the list is terminated by a zero byte and is also preceded by
a single byte which gives a "priority" number. The list of languages is terminated by an
additional zero byte.
A language name consists of a language code, optionally followed by one or more qualifier (dialect)
names separated by hyphens (eg. "en-uk"). A voice might, for example, have languages "en-uk" and
"en". Even without "en" listed, voice would still be selected for the "en" language (because
"en-uk" is related) but at a lower priority.
The priority byte indicates how the voice is preferred for the language. A low number indicates a
more preferred voice, a higher number indicates a less preferred voice.
In (2), the "languages" field consists simply of a single (UTF8) language name, with no preceding
priority byte.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API const espeak_VOICE **espeak_ListVoices(espeak_VOICE *voice_spec);
/* Reads the voice files from espeak-ng-data/voices and creates an array of espeak_VOICE pointers.
The list is terminated by a NULL pointer
If voice_spec is NULL then all voices are listed.
If voice spec is given, then only the voices which are compatible with the voice_spec
are listed, and they are listed in preference order.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_ERROR espeak_SetVoiceByFile(const char *filename);
/* Loads a voice given the file path. Language is not considered.
"filename" is a UTF8 string.
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_ERROR espeak_SetVoiceByName(const char *name);
/* Searches for a voice with a matching "name" field. Language is not considered.
"name" is a UTF8 string.
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_ERROR espeak_SetVoiceByProperties(espeak_VOICE *voice_spec);
/* An espeak_VOICE structure is used to pass criteria to select a voice. Any of the following
fields may be set:
name NULL, or a voice name
languages NULL, or a single language string (with optional dialect), eg. "en-uk", or "en"
gender 0=not specified, 1=male, 2=female
age 0=not specified, or an age in years
variant After a list of candidates is produced, scored and sorted, "variant" is used to index
that list and choose a voice.
variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_VOICE *espeak_GetCurrentVoice(void);
/* Returns the espeak_VOICE data for the currently selected voice.
This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s>
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_ERROR espeak_Cancel(void);
/* Stop immediately synthesis and audio output of the current text. When this
function returns, the audio output is fully stopped and the synthesizer is ready to
synthesize a new message.
Return: EE_OK: operation achieved
EE_INTERNAL_ERROR.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API int espeak_IsPlaying(void);
/* Returns 1 if audio is played, 0 otherwise.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_ERROR espeak_Synchronize(void);
/* This function returns when all data have been spoken.
Return: EE_OK: operation achieved
EE_INTERNAL_ERROR.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API espeak_ERROR espeak_Terminate(void);
/* last function to be called.
Return: EE_OK: operation achieved
EE_INTERNAL_ERROR.
*/
#ifdef __cplusplus
extern "C"
#endif
ESPEAK_API const char *espeak_Info(const char **path_data);
/* Returns the version number string.
path_data returns the path to espeak_data
*/
#endif

View File

@ -0,0 +1,76 @@
#pragma once
#include "ggml.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
typedef struct ggml_backend * ggml_backend_t;
// Tensor allocator
struct ggml_tallocr {
ggml_backend_buffer_t buffer;
void * base;
size_t alignment;
size_t offset;
};
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
// Graph allocator
/*
Example usage:
ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
// optional: create a worst-case graph and reserve the buffers to avoid reallocations
ggml_gallocr_reserve(galloc, build_graph(max_batch));
// allocate the graph
struct ggml_cgraph * graph = build_graph(batch);
ggml_gallocr_alloc_graph(galloc, graph);
printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0));
// evaluate the graph
ggml_backend_graph_compute(backend, graph);
*/
// special tensor flags for use with the graph allocator:
// ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
// ggml_set_output(): output tensors are never freed and never overwritten
typedef struct ggml_gallocr * ggml_gallocr_t;
GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft);
GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs);
GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
// pre-allocate buffers from a measure graph - does not allocate or modify the graph
// call with a worst-case graph to avoid buffer reallocations
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
// returns false if the buffer allocation failed
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
GGML_API bool ggml_gallocr_reserve_n(
ggml_gallocr_t galloc,
struct ggml_cgraph * graph,
const int * node_buffer_ids,
const int * leaf_buffer_ids);
// automatic reallocation if the topology changes when using a single buffer
// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
// Utils
// Create a buffer and allocate all the tensors in a ggml_context
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,354 @@
#pragma once
#include "ggml.h"
#include "ggml-alloc.h"
#ifdef GGML_BACKEND_SHARED
# if defined(_WIN32) && !defined(__MINGW32__)
# ifdef GGML_BACKEND_BUILD
# define GGML_BACKEND_API __declspec(dllexport) extern
# else
# define GGML_BACKEND_API __declspec(dllimport) extern
# endif
# else
# define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern
# endif
#else
# define GGML_BACKEND_API extern
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
typedef struct ggml_backend_event * ggml_backend_event_t;
typedef struct ggml_backend * ggml_backend_t;
typedef void * ggml_backend_graph_plan_t;
typedef struct ggml_backend_reg * ggml_backend_reg_t;
typedef struct ggml_backend_device * ggml_backend_dev_t;
//
// Backend buffer type
//
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft);
//
// Backend buffer
//
enum ggml_backend_buffer_usage {
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2,
};
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer);
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
// tensor copy between different backends
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
//
// Backend (stream)
//
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
GGML_API void ggml_backend_free(ggml_backend_t backend);
GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
// "offset" refers to the offset in tensor->data for setting/getting data
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
// NOTE: will be removed, use device version instead
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
// asynchronous copy
// the copy is performed after all the currently queued operations in backend_src
// backend_dst will wait for the copy to complete before performing other operations
// automatic fallback to sync copy if async is not supported
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend);
//
// Events
//
GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device);
GGML_API void ggml_backend_event_free(ggml_backend_event_t event);
GGML_API void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend);
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
GGML_API void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event);
//
// Backend device
//
enum ggml_backend_dev_type {
// CPU device using system memory
GGML_BACKEND_DEVICE_TYPE_CPU,
// GPU device using dedicated memory
GGML_BACKEND_DEVICE_TYPE_GPU,
// accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
GGML_BACKEND_DEVICE_TYPE_ACCEL
};
// functionality supported by the device
struct ggml_backend_dev_caps {
// asynchronous operations
bool async;
// pinned host buffer
bool host_buffer;
// creating buffers from host ptr
bool buffer_from_host_ptr;
// event synchronization
bool events;
};
// all the device properties
struct ggml_backend_dev_props {
const char * name;
const char * description;
size_t memory_free;
size_t memory_total;
enum ggml_backend_dev_type type;
struct ggml_backend_dev_caps caps;
};
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device);
GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
GGML_API enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device);
GGML_API void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props);
GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
//
// Backend (reg)
//
GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg);
GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
// Common functions that may be obtained using ggml_backend_reg_get_proc_address
// Split buffer type for tensor parallelism
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split);
// Set the number of threads for the backend
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
// Get additional buffer types provided by the device (returns a NULL-terminated array)
typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
// Set the abort callback for the backend
typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data);
// Get a list of feature flags supported by the backend (returns a NULL-terminated array)
struct ggml_backend_feature {
const char * name;
const char * value;
};
typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg);
//
// Backend registry
//
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
// Backend (reg) enumeration
GGML_API size_t ggml_backend_reg_count(void);
GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name);
// Device enumeration
GGML_API size_t ggml_backend_dev_count(void);
GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name);
GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type);
// Direct backend (stream) initialization
// = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
// = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params);
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
GGML_API ggml_backend_t ggml_backend_init_best(void);
// Load a backend from a dynamic library and register it
GGML_API ggml_backend_reg_t ggml_backend_load(const char * path);
// Unload a backend if loaded dynamically and unregister it
GGML_API void ggml_backend_unload(ggml_backend_reg_t reg);
// Load all known backends from dynamic libraries
GGML_API void ggml_backend_load_all(void);
GGML_API void ggml_backend_load_all_from_path(const char * dir_path);
//
// Backend scheduler
//
// The backend scheduler allows for multiple backend devices to be used together
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
// The backends are selected based on:
// - the backend that supports the operation
// - the location of the pre-allocated tensors (e.g. the weights)
/*
Example usage:
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
// preferrably to run on the same backend as the buffer
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
// initialize buffers from a max size graph (optional)
reserve_graph = build_graph(sched, max_batch_size);
// manually assign nodes to a backend (optional, should not be needed in most cases)
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
ggml_backend_sched_reserve(sched, reserve_graph);
// compute
graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation
for (int i = 0; i < 10; ++i) {
ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically
}
// if there are graph inputs:
graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called)
ggml_backend_sched_reset(sched); // clear the allocation of the previous graph
ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it
ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors
ggml_backend_sched_graph_compute(sched, graph); // execute the graph
// as an alternative to the above it is also possible to assign the inputs to a dedicated context and
// allocate them statically via ggml_backend_alloc_ctx_tensors
}
*/
typedef struct ggml_backend_sched * ggml_backend_sched_t;
// Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback)
// when ask == true, the scheduler wants to know if the user wants to observe this node
// this allows the scheduler to batch nodes together in order to evaluate them in a single call
//
// when ask == false, the scheduler is passing the node tensor to the user for observation
// if the user returns false, the scheduler will cancel the graph compute
//
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
// Initialize a backend scheduler, backends with low index are given priority over backends with high index
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
// Initialize backend buffers from a measure graph
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success
GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
// Get the number of splits of the last graph
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
// Allocate and compute graph on the backend scheduler
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
// Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph.
// This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers.
// The correct way to use this API is to discard the deallocated tensors and create new ones.
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
// Set a callback to be called for each resulting node during graph compute
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
//
// Utils
//
struct ggml_backend_graph_copy {
ggml_backend_buffer_t buffer;
struct ggml_context * ctx_allocated;
struct ggml_context * ctx_unallocated;
struct ggml_cgraph * graph;
};
// Copy a graph to a different backend
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
// Compare the output of two backends
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
// Tensor initialization
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
// CPU buffer types are always available
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,25 @@
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
// number of threads used for conversion to float
// for openblas and blis, this will also set the number of threads used for blas operations
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,123 @@
/*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#include "ggml-backend.h"
#include "ggml.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief Maximum number of CANN devices supported.
*/
#define GGML_CANN_MAX_DEVICES 16
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void);
/**
* @brief Initializes the CANN backend for a specified device.
*
* This function initializes the CANN backend for the given device.
* It verifies the device index, allocates a context, and creates a backend
* instance.
*
* @param device The index of the device to initialize.
* @return A pointer to the initialized backend instance, or nullptr on failure.
*/
GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device);
/**
* @brief Checks if a given backend is a CANN backend.
*
* This function verifies if the provided backend is a CANN backend by comparing
* its GUID with the CANN backend's GUID.
*
* @param backend The backend instance to check.
* @return True if the backend is a CANN backend, false otherwise.
*/
GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend);
/**
* @brief Retrieves the CANN buffer type for a specified device.
*
* This function initializes and returns the buffer type interface associated
* with the given device. It ensures thread-safe access using a mutex.
*
* @param device The device index for which to retrieve the buffer type.
* @return A pointer to the buffer type interface for the specified device, or
* nullptr if the device index is out of range.
*/
GGML_BACKEND_API ggml_backend_buffer_type_t
ggml_backend_cann_buffer_type(int32_t device);
/**
* @brief Retrieves the number of CANN devices available.
*
* This function returns the number of CANN devices available based on
* information obtained from `ggml_cann_info()`.
*
* @return The number of CANN devices available.
*/
GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void);
/**
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
*
* @return A pointer to the host buffer type interface.
*/
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
/**
* @brief Retrieves the description of a specific CANN device.
*
* This function sets the specified device, retrieves the SoC name,
* and writes it into the provided description buffer.
*
* @param device The device index to retrieve the description for.
* @param description Pointer to a buffer where the description will be written.
* @param description_size Size of the description buffer.
*/
GGML_BACKEND_API void ggml_backend_cann_get_device_description(
int32_t device, char* description, size_t description_size);
/**
* @brief Retrieves the memory information of a specific CANN device.
*
* This function sets the specified device, retrieves the free and total
* memory information of the specified type (ACL_HBM_MEM), and stores them
* in the provided pointers.
*
* @param device The device index to retrieve memory information for.
* @param free Pointer to a variable where the free memory size will be stored.
* @param total Pointer to a variable where the total memory size will be
* stored.
*/
GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device,
size_t* free,
size_t* total);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,39 @@
#pragma once
#ifndef __cplusplus
#error "This header is for C++ only"
#endif
#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "gguf.h"
#include <memory>
// Smart pointers for ggml types
// ggml
struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } };
struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } };
typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr;
typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr;
// ggml-alloc
struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } };
typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr;
// ggml-backend
struct ggml_backend_deleter { void operator()(ggml_backend_t backend) { ggml_backend_free(backend); } };
struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } };
struct ggml_backend_event_deleter { void operator()(ggml_backend_event_t event) { ggml_backend_event_free(event); } };
struct ggml_backend_sched_deleter { void operator()(ggml_backend_sched_t sched) { ggml_backend_sched_free(sched); } };
typedef std::unique_ptr<ggml_backend, ggml_backend_deleter> ggml_backend_ptr;
typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr;
typedef std::unique_ptr<ggml_backend_event, ggml_backend_event_deleter> ggml_backend_event_ptr;
typedef std::unique_ptr<ggml_backend_sched, ggml_backend_sched_deleter> ggml_backend_sched_ptr;

View File

@ -0,0 +1,138 @@
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
// the compute plan that needs to be prepared for ggml_graph_compute()
// since https://github.com/ggml-org/ggml/issues/287
struct ggml_cplan {
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
int n_threads;
struct ggml_threadpool * threadpool;
// abort ggml_graph_compute when true
ggml_abort_callback abort_callback;
void * abort_callback_data;
};
// numa strategies
enum ggml_numa_strategy {
GGML_NUMA_STRATEGY_DISABLED = 0,
GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
GGML_NUMA_STRATEGY_ISOLATE = 2,
GGML_NUMA_STRATEGY_NUMACTL = 3,
GGML_NUMA_STRATEGY_MIRROR = 4,
GGML_NUMA_STRATEGY_COUNT
};
GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
const struct ggml_cgraph * cgraph,
int n_threads, /* = GGML_DEFAULT_N_THREADS */
struct ggml_threadpool * threadpool /* = NULL */ );
GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
// same as ggml_graph_compute() but the work data is allocated as a part of the context
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
//
// system info
//
// x86
GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
GGML_BACKEND_API int ggml_cpu_has_avx (void);
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
GGML_BACKEND_API int ggml_cpu_has_fma (void);
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void);
// ARM
GGML_BACKEND_API int ggml_cpu_has_neon (void);
GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
GGML_BACKEND_API int ggml_cpu_has_sve (void);
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
GGML_BACKEND_API int ggml_cpu_has_sme (void);
// other
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
// Internal types and functions exposed for tests and benchmarks
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
const void * GGML_RESTRICT y, size_t by, int nrc);
struct ggml_type_traits_cpu {
ggml_from_float_t from_float;
ggml_vec_dot_t vec_dot;
enum ggml_type vec_dot_type;
int64_t nrows; // number of rows to process simultaneously
};
GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
GGML_BACKEND_API void ggml_cpu_init(void);
//
// CPU backend
//
GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend);
GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,47 @@
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifdef GGML_USE_HIP
#define GGML_CUDA_NAME "ROCm"
#define GGML_CUBLAS_NAME "hipBLAS"
#elif defined(GGML_USE_MUSA)
#define GGML_CUDA_NAME "MUSA"
#define GGML_CUBLAS_NAME "muBLAS"
#else
#define GGML_CUDA_NAME "CUDA"
#define GGML_CUBLAS_NAME "cuBLAS"
#endif
#define GGML_CUDA_MAX_DEVICES 16
// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_cuda_init(int device);
GGML_BACKEND_API bool ggml_backend_is_cuda(ggml_backend_t backend);
// device buffer
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
// split tensor buffer that splits matrices by rows across multiple devices
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split);
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
GGML_BACKEND_API int ggml_backend_cuda_get_device_count(void);
GGML_BACKEND_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
GGML_BACKEND_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
GGML_BACKEND_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
GGML_BACKEND_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,50 @@
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define GGML_KOMPUTE_MAX_DEVICES 16
struct ggml_vk_device {
int index;
int type; // same as VkPhysicalDeviceType
size_t heapSize;
const char * name;
const char * vendor;
int subgroupSize;
uint64_t bufferAlignment;
uint64_t maxAlloc;
};
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
bool ggml_vk_has_vulkan(void);
bool ggml_vk_has_device(void);
struct ggml_vk_device ggml_vk_current_device(void);
//
// backend API
//
// forward declaration
typedef struct ggml_backend * ggml_backend_t;
GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,66 @@
// Note: this description is outdated
//
// An interface allowing to compute ggml_cgraph with Metal
//
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
//
// How it works?
//
// As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
// interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
// use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
//
// You only need to make sure that all memory buffers that you used during the graph creation
// are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
// used during the graph evaluation to determine the arguments of the compute kernels.
//
// Synchronization between device and host memory (for example for input and output tensors)
// is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
//
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#include <stddef.h>
#include <stdbool.h>
struct ggml_tensor;
struct ggml_cgraph;
#ifdef __cplusplus
extern "C" {
#endif
//
// backend API
// user-code should use only these functions
//
GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void);
GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
GGML_DEPRECATED(
GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
"obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
// helper to check if the device supports a specific family
// ideally, the user code should be doing these checks
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
GGML_BACKEND_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
GGML_BACKEND_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_metal_reg(void);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,26 @@
#ifndef GGML_OPENCL_H
#define GGML_OPENCL_H
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
//
// backend API
//
GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void);
GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend);
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void);
#ifdef __cplusplus
}
#endif
#endif // GGML_OPENCL_H

View File

@ -0,0 +1,216 @@
// This file contains functionality for training models using GGML.
// It is not strictly needed vs. just vanilla GGML but it provides a more high-level interface for common needs such as datasets.
// At the bottom of this file especially there are relatively high-level functions that are suitable use or adaptation in user code.
//
// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
struct ggml_opt_dataset;
struct ggml_opt_context;
struct ggml_opt_result;
typedef struct ggml_opt_dataset * ggml_opt_dataset_t;
typedef struct ggml_opt_context * ggml_opt_context_t;
typedef struct ggml_opt_result * ggml_opt_result_t;
// ====== Loss ======
// built-in loss types, i.e. the built-in quantities minimized by the optimizer
// custom loss types can be defined via mean or sum which simply reduce the outputs for all datapoints to a single value
enum ggml_opt_loss_type {
GGML_OPT_LOSS_TYPE_MEAN,
GGML_OPT_LOSS_TYPE_SUM,
GGML_OPT_LOSS_TYPE_CROSS_ENTROPY,
GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR,
};
// ====== Dataset ======
GGML_API ggml_opt_dataset_t ggml_opt_dataset_init(
int64_t ne_datapoint, // number of elements per datapoint
int64_t ne_label, // number of elements per label
int64_t ndata, // total number of datapoints/labels
int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset);
// get underlying tensors that store the data
GGML_API struct ggml_tensor * ggml_opt_dataset_data (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata]
GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label, ndata]
// shuffle idata first datapoints from dataset with RNG from opt_ctx, shuffle all datapoints if idata is negative
GGML_API void ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int64_t idata);
// get batch at position ibatch from dataset and copy the data to data_batch and labels_batch
GGML_API void ggml_opt_dataset_get_batch(
ggml_opt_dataset_t dataset,
struct ggml_tensor * data_batch, // shape = [ne_datapoint, ndata_batch]
struct ggml_tensor * labels_batch, // shape = [ne_label, ndata_batch]
int64_t ibatch);
// ====== Model / Context ======
enum ggml_opt_build_type {
GGML_OPT_BUILD_TYPE_FORWARD,
GGML_OPT_BUILD_TYPE_GRAD,
GGML_OPT_BUILD_TYPE_OPT,
};
// parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
struct ggml_opt_optimizer_params {
// AdamW optimizer parameters
struct {
float alpha; // learning rate
float beta1;
float beta2;
float eps; // epsilon for numerical stability
float wd; // weight decay for AdamW, use 0.0f to disable
} adamw;
};
// callback to calculate optimizer parameters prior to a backward pass
// userdata can be used to pass arbitrary data
typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata);
// returns the default optimizer params (constant)
// userdata is not used
GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata);
// parameters for initializing a new optimization context
struct ggml_opt_params {
ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs
struct ggml_context * ctx_compute; // created in user code, holds non-static tensors
// the forward graph is defined by inputs and outputs
// those tensors and all tensors inbetween are not intended to be reusable between multiple optimization contexts
struct ggml_tensor * inputs;
struct ggml_tensor * outputs;
enum ggml_opt_loss_type loss_type;
enum ggml_opt_build_type build_type;
int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done
ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
void * get_opt_pars_ud; // userdata for calculating optimizer parameters
};
// get parameters for an optimization context with defaults set where possible
// parameters for which no sensible defaults exist are supplied as arguments to this function
GGML_API ggml_opt_params ggml_opt_default_params(
ggml_backend_sched_t backend_sched,
struct ggml_context * ctx_compute,
struct ggml_tensor * inputs,
struct ggml_tensor * outputs,
enum ggml_opt_loss_type loss_type);
GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
// set gradients to zero, initilize loss, and optionally reset the optimizer
GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
// get underlying tensors that store data
GGML_API struct ggml_tensor * ggml_opt_inputs( ggml_opt_context_t opt_ctx); // forward graph input tensor
GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor
GGML_API struct ggml_tensor * ggml_opt_labels( ggml_opt_context_t opt_ctx); // labels to compare outputs against
GGML_API struct ggml_tensor * ggml_opt_loss( ggml_opt_context_t opt_ctx); // scalar tensor that contains the loss
GGML_API struct ggml_tensor * ggml_opt_pred( ggml_opt_context_t opt_ctx); // predictions made by outputs
GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels
GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
// ====== Optimization Result ======
GGML_API ggml_opt_result_t ggml_opt_result_init();
GGML_API void ggml_opt_result_free(ggml_opt_result_t result);
GGML_API void ggml_opt_result_reset(ggml_opt_result_t result);
// get data from result, uncertainties are optional and can be ignored by passing NULL
GGML_API void ggml_opt_result_ndata( ggml_opt_result_t result, int64_t * ndata); // writes 1 value, number of datapoints
GGML_API void ggml_opt_result_loss( ggml_opt_result_t result, double * loss, double * unc); // writes 1 value
GGML_API void ggml_opt_result_pred( ggml_opt_result_t result, int32_t * pred); // writes ndata values
GGML_API void ggml_opt_result_accuracy(ggml_opt_result_t result, double * accuracy, double * unc); // writes 1 value
// ====== Computation ======
// do forward pass, increment result if not NULL
GGML_API void ggml_opt_forward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
// do forward pass, increment result if not NULL, do backward pass
GGML_API void ggml_opt_forward_backward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
// ############################################################################
// ## The high-level functions start here. They do not depend on any private ##
// ## functions or structs and can be copied to and adapted for user code. ##
// ############################################################################
// ====== Intended Usage ======
//
// 1. Select the appropriate loss for your problem.
// 2. Create a dataset and set the data for the "data" tensor. Also set the "labels" tensor if your loss needs them.
// Setting the shard size to 1 will be fine, it's the granularity with which data is shuffled/loaded (bigger values are faster).
// 3. Create a GGML graph for your model with no_alloc == true. Use two separate contexts for the tensors.
// The first context should contain the model parameters and inputs and be allocated statically in user code.
// The second context should contain all other tensors and will be (re)allocated automatically.
// Due to this automated allocation the data of the second context is not defined when accessed in user code.
// Note that the second dimension of the inputs/outputs are interpreted as the number of datapoints in those tensors.
// 4. Call ggml_opt_fit. If you need more control you can use ggml_opt_epoch instead.
// signature for a callback while evaluating opt_ctx on dataset, called after an evaluation
typedef void (*ggml_opt_epoch_callback)(
bool train, // true after training evaluation, false after validation evaluation
ggml_opt_context_t opt_ctx,
ggml_opt_dataset_t dataset,
ggml_opt_result_t result, // result associated with the dataset subsection
int64_t ibatch, // number of batches that have been evaluated so far
int64_t ibatch_max, // total number of batches in this dataset subsection
int64_t t_start_us); // time at which the evaluation on the dataset subsection was started
// do training on front of dataset, do evaluation only on back of dataset
GGML_API void ggml_opt_epoch(
ggml_opt_context_t opt_ctx,
ggml_opt_dataset_t dataset,
ggml_opt_result_t result_train, // result to increment during training, ignored if NULL
ggml_opt_result_t result_eval, // result to increment during evaluation, ignored if NULL
int64_t idata_split, // data index at which to split training and evaluation
ggml_opt_epoch_callback callback_train,
ggml_opt_epoch_callback callback_eval);
// callback that prints a progress bar on stderr
GGML_API void ggml_opt_epoch_callback_progress_bar(
bool train,
ggml_opt_context_t opt_ctx,
ggml_opt_dataset_t dataset,
ggml_opt_result_t result,
int64_t ibatch,
int64_t ibatch_max,
int64_t t_start_us);
// fit model defined by inputs and outputs to dataset
GGML_API void ggml_opt_fit(
ggml_backend_sched_t backend_sched, // backend scheduler for constructing the compute graphs
ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs
ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch]
ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
enum ggml_opt_loss_type loss_type, // loss to minimize
ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
int64_t nepoch, // how many times the dataset should be iterated over
int64_t nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
float val_split, // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
bool silent); // whether or not info prints to stderr should be suppressed
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,30 @@
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
#define GGML_RPC_MAX_SERVERS 16
// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend);
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
const char * cache_dir,
size_t free_mem, size_t total_mem);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,49 @@
//
// MIT license
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: MIT
//
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#define GGML_SYCL_NAME "SYCL"
#define GGML_SYCL_MAX_DEVICES 48
#ifdef __cplusplus
extern "C" {
#endif
// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_sycl_init(int device);
GGML_BACKEND_API bool ggml_backend_is_sycl(ggml_backend_t backend);
// devide buffer
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
// split tensor buffer that splits matrices by rows across multiple devices
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
GGML_BACKEND_API void ggml_backend_sycl_print_sycl_devices(void);
GGML_BACKEND_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len);
GGML_BACKEND_API void ggml_backend_sycl_get_device_description(int device,
char *description,
size_t description_size);
GGML_BACKEND_API int ggml_backend_sycl_get_device_count();
GGML_BACKEND_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
// SYCL doesn't support registering host memory, keep here for reference
// GGML_BACKEND_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
// GGML_BACKEND_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_sycl_reg(void);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,29 @@
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
#define GGML_VK_NAME "Vulkan"
#define GGML_VK_MAX_DEVICES 16
// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
GGML_BACKEND_API bool ggml_backend_is_vk(ggml_backend_t backend);
GGML_BACKEND_API int ggml_backend_vk_get_device_count(void);
GGML_BACKEND_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
GGML_BACKEND_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_vk_reg(void);
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
#pragma once
#ifndef __cplusplus
#error "This header is for C++ only"
#endif
#include <memory>
#include "llama.h"
struct llama_model_deleter {
void operator()(llama_model * model) { llama_model_free(model); }
};
struct llama_context_deleter {
void operator()(llama_context * context) { llama_free(context); }
};
struct llama_sampler_deleter {
void operator()(llama_sampler * sampler) { llama_sampler_free(sampler); }
};
struct llama_adapter_lora_deleter {
void operator()(llama_adapter_lora * adapter) { llama_adapter_lora_free(adapter); }
};
typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;
typedef std::unique_ptr<llama_context, llama_context_deleter> llama_context_ptr;
typedef std::unique_ptr<llama_sampler, llama_sampler_deleter> llama_sampler_ptr;
typedef std::unique_ptr<llama_adapter_lora, llama_adapter_lora_deleter> llama_adapter_lora_ptr;

View File

@ -0,0 +1,61 @@
#pragma once
#include "ggml.h" // for ggml_log_level
#include <string>
#include <vector>
#ifdef __GNUC__
# if defined(__MINGW32__) && !defined(__clang__)
# define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
# else
# define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
# endif
#else
# define LLAMA_ATTRIBUTE_FORMAT(...)
#endif
//
// logging
//
LLAMA_ATTRIBUTE_FORMAT(2, 3)
void llama_log_internal (ggml_log_level level, const char * format, ...);
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
#define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
#define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
#define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
//
// helpers
//
template <typename T>
struct no_init {
T value;
no_init() { /* do nothing */ }
};
struct time_meas {
time_meas(int64_t & t_acc, bool disable = false);
~time_meas();
const int64_t t_start_us;
int64_t & t_acc;
};
void replace_all(std::string & s, const std::string & search, const std::string & replace);
// TODO: rename to llama_format ?
LLAMA_ATTRIBUTE_FORMAT(1, 2)
std::string format(const char * fmt, ...);
std::string llama_format_tensor_shape(const std::vector<int64_t> & ne);
std::string llama_format_tensor_shape(const struct ggml_tensor * t);
std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i);

View File

@ -0,0 +1,125 @@
#pragma once
#include "llama.h"
#include <string>
#include <vector>
#include <memory>
struct LLM_KV;
struct llama_model_loader;
struct llama_vocab {
struct token_data {
std::string text;
float score;
llama_token_attr attr;
};
llama_vocab();
~llama_vocab();
void load(llama_model_loader & ml, const LLM_KV & kv);
enum llama_vocab_type get_type() const;
enum llama_vocab_pre_type get_pre_type() const;
uint32_t n_tokens() const;
uint32_t n_token_types() const;
std::string type_name() const;
bool is_normal (llama_token id) const;
bool is_unknown (llama_token id) const;
bool is_control (llama_token id) const;
bool is_byte (llama_token id) const;
bool is_user_defined(llama_token id) const;
bool is_unused (llama_token id) const;
bool is_eog (llama_token id) const;
uint8_t token_to_byte(llama_token id) const;
llama_token byte_to_token(uint8_t ch) const;
llama_token text_to_token(const std::string & text) const;
const token_data & get_token_data(llama_token id) const;
const char * token_get_text (llama_token id) const;
float token_get_score(llama_token id) const;
llama_token_attr token_get_attr (llama_token id) const;
llama_token token_bos() const;
llama_token token_eos() const;
llama_token token_eot() const;
llama_token token_eom() const;
llama_token token_unk() const;
llama_token token_sep() const;
llama_token token_nl () const;
llama_token token_pad() const;
llama_token token_prefix() const;
llama_token token_middle() const;
llama_token token_suffix() const;
llama_token token_fim_pre() const;
llama_token token_fim_suf() const;
llama_token token_fim_mid() const;
llama_token token_fim_pad() const;
llama_token token_fim_rep() const;
llama_token token_fim_sep() const;
bool get_add_space_prefix () const;
bool get_add_bos () const;
bool get_add_eos () const;
bool get_ignore_merges () const;
bool get_clean_spaces () const;
bool get_remove_extra_whitespaces () const;
bool get_escape_whitespaces () const;
bool get_treat_whitespace_as_suffix() const;
int max_token_len() const;
int find_bpe_rank(const std::string & token_left, const std::string & token_right) const;
int32_t tokenize(
const char * text,
int32_t text_len,
llama_token * tokens,
int32_t n_tokens_max,
bool add_special,
bool parse_special) const;
std::vector<llama_token> tokenize(
const std::string & raw_text,
bool add_special,
bool parse_special = false) const;
// does not write null-terminator to buf
int32_t token_to_piece(
llama_token token,
char * buf,
int32_t length,
int32_t lstrip,
bool special) const;
// use cached data
const std::string & token_to_piece(llama_token token) const;
int32_t detokenize(
const llama_token * tokens,
int32_t n_tokens,
char * text,
int32_t text_len_max,
bool remove_special,
bool unparse_special) const;
std::string detokenize(
const std::vector<llama_token> & tokens,
bool special) const;
void print_info() const;
private:
struct impl;
std::unique_ptr<impl> pimpl;
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,358 @@
#pragma once
#include "llama-vocab.h"
#include <array>
/* Begin cringe so I can access the model's tok_embd */
// it needs to be copied so the struct layout is exactly as it is under llama.cpp
#define LLAMA_MAX_LAYERS 512
#define LLAMA_MAX_EXPERTS 160 // DeepSeekV2
enum llm_type {
LLM_TYPE_UNKNOWN,
};
enum llm_arch {
LLM_ARCH_UNKNOWN,
};
enum llama_expert_gating_func_type {
LLAMA_EXPERT_GATING_FUNC_TYPE_NONE = 0,
LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX = 1,
LLAMA_EXPERT_GATING_FUNC_TYPE_SIGMOID = 2,
};
struct llama_hparams_posnet {
uint32_t n_embd;
uint32_t n_layer;
};
struct llama_hparams_convnext {
uint32_t n_embd;
uint32_t n_layer;
};
struct llama_hparams {
bool vocab_only;
bool rope_finetuned;
bool use_par_res;
bool swin_norm;
uint32_t n_ctx_train; // context size the model was trained on
uint32_t n_embd;
uint32_t n_embd_features = 0;
uint32_t n_layer;
uint32_t n_rot;
uint32_t n_swa = 0; // sliding window attention (SWA)
uint32_t n_swa_pattern = 1; // by default, all layers use non-sliding-window attention
uint32_t n_embd_head_k; // dimension of keys (d_k). d_q is assumed to be the same, but there are n_head q heads, and only n_head_kv k-v heads
uint32_t n_embd_head_v; // dimension of values (d_v) aka n_embd_head
uint32_t n_expert = 0;
uint32_t n_expert_used = 0;
uint32_t n_rel_attn_bkts = 0;
// for WavTokenizer
struct llama_hparams_posnet posnet;
struct llama_hparams_convnext convnext;
std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_arr;
std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_kv_arr;
std::array<uint32_t, LLAMA_MAX_LAYERS> n_ff_arr;
uint32_t n_layer_dense_lead = 0;
uint32_t n_lora_q = 0;
uint32_t n_lora_kv = 0;
uint32_t n_ff_exp = 0;
uint32_t n_ff_shexp = 0;
uint32_t n_expert_shared = 0;
uint32_t n_norm_groups = 0;
float expert_weights_scale = 0.0;
bool expert_weights_norm = false;
uint32_t expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_NONE;
float f_norm_eps;
float f_norm_rms_eps;
float f_norm_group_eps;
float f_attn_logit_softcapping = 50.0f;
float f_final_logit_softcapping = 30.0f;
// for RWKV
uint32_t rescale_every_n_layers = 0;
uint32_t time_mix_extra_dim = 0;
uint32_t time_decay_extra_dim = 0;
uint32_t wkv_head_size = 0;
uint32_t token_shift_count = 2;
uint32_t n_lora_decay = 0;
uint32_t n_lora_iclr = 0;
uint32_t n_lora_value_res_mix = 0;
uint32_t n_lora_gate = 0;
float rope_attn_factor = 1.0f;
float rope_freq_base_train;
float rope_freq_base_train_swa;
float rope_freq_scale_train;
float rope_freq_scale_train_swa;
uint32_t n_ctx_orig_yarn;
float rope_yarn_log_mul;
std::array<int, 4> rope_sections;
// for State Space Models
uint32_t ssm_d_conv = 0;
uint32_t ssm_d_inner = 0;
uint32_t ssm_d_state = 0;
uint32_t ssm_dt_rank = 0;
bool ssm_dt_b_c_rms = false;
float f_clamp_kqv = 0.0f;
float f_max_alibi_bias = 0.0f;
float f_logit_scale = 0.0f;
// Additional scale factors (Granite/Granite MoE)
float f_residual_scale = 0.0f;
float f_embedding_scale = 0.0f;
float f_attention_scale = 0.0f;
bool causal_attn = true;
bool use_alibi = false;
bool attn_soft_cap = false;
// needed by encoder-decoder models (e.g. T5, FLAN-T5)
// ref: https://github.com/ggerganov/llama.cpp/pull/8141
llama_token dec_start_token_id = LLAMA_TOKEN_NULL;
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE;
enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;
enum llama_rope_scaling_type rope_scaling_type_train = LLAMA_ROPE_SCALING_TYPE_NONE;
uint32_t n_head(uint32_t il = 0) const;
uint32_t n_head_kv(uint32_t il = 0) const;
uint32_t n_ff(uint32_t il = 0) const;
uint32_t n_gqa(uint32_t il = 0) const;
// dimension of key embeddings across all k-v heads
uint32_t n_embd_k_gqa(uint32_t il = 0) const;
// dimension of value embeddings across all k-v heads
uint32_t n_embd_v_gqa(uint32_t il = 0) const;
// dimension of the rolling state embeddings
// corresponds to Mamba's conv_states size or RWKV's token_shift states size
uint32_t n_embd_k_s() const;
// dimension of the recurrent state embeddings
uint32_t n_embd_v_s() const;
bool is_swa(uint32_t il) const;
};
struct llama_model {
llm_type type = LLM_TYPE_UNKNOWN;
llm_arch arch = LLM_ARCH_UNKNOWN;
std::string name = "n/a";
llama_hparams hparams = {};
llama_vocab vocab;
struct ggml_tensor * tok_embd = nullptr;
struct ggml_tensor * type_embd = nullptr;
struct ggml_tensor * pos_embd = nullptr;
struct ggml_tensor * tok_norm = nullptr;
struct ggml_tensor * tok_norm_b = nullptr;
struct ggml_tensor * output_norm = nullptr;
struct ggml_tensor * output_norm_b = nullptr;
struct ggml_tensor * output = nullptr;
struct ggml_tensor * output_b = nullptr;
struct ggml_tensor * output_norm_enc = nullptr;
// classifier
struct ggml_tensor * cls = nullptr;
struct ggml_tensor * cls_b = nullptr;
struct ggml_tensor * cls_out = nullptr;
struct ggml_tensor * cls_out_b = nullptr;
struct ggml_tensor * conv1d = nullptr;
struct ggml_tensor * conv1d_b = nullptr;
};
struct llama_vocab_hack {
struct token_data {
std::string text;
float score;
llama_token_attr attr;
};
llama_vocab_hack();
~llama_vocab_hack();
void load(llama_model_loader & ml, const LLM_KV & kv);
enum llama_vocab_type get_type() const;
enum llama_vocab_pre_type get_pre_type() const;
uint32_t n_tokens() const;
uint32_t n_token_types() const;
std::string type_name() const;
bool is_normal (llama_token id) const;
bool is_unknown (llama_token id) const;
bool is_control (llama_token id) const;
bool is_byte (llama_token id) const;
bool is_user_defined(llama_token id) const;
bool is_unused (llama_token id) const;
bool is_eog (llama_token id) const;
uint8_t token_to_byte(llama_token id) const;
llama_token byte_to_token(uint8_t ch) const;
llama_token text_to_token(const std::string & text) const;
const token_data & get_token_data(llama_token id) const;
const char * token_get_text (llama_token id) const;
float token_get_score(llama_token id) const;
llama_token_attr token_get_attr (llama_token id) const;
llama_token token_bos() const;
llama_token token_eos() const;
llama_token token_eot() const;
llama_token token_eom() const;
llama_token token_unk() const;
llama_token token_sep() const;
llama_token token_nl () const;
llama_token token_pad() const;
llama_token token_prefix() const;
llama_token token_middle() const;
llama_token token_suffix() const;
llama_token token_fim_pre() const;
llama_token token_fim_suf() const;
llama_token token_fim_mid() const;
llama_token token_fim_pad() const;
llama_token token_fim_rep() const;
llama_token token_fim_sep() const;
bool get_add_space_prefix () const;
bool get_add_bos () const;
bool get_add_eos () const;
bool get_ignore_merges () const;
bool get_clean_spaces () const;
bool get_remove_extra_whitespaces () const;
bool get_escape_whitespaces () const;
bool get_treat_whitespace_as_suffix() const;
int max_token_len() const;
int find_bpe_rank(const std::string & token_left, const std::string & token_right) const;
int32_t tokenize(
const char * text,
int32_t text_len,
llama_token * tokens,
int32_t n_tokens_max,
bool add_special,
bool parse_special) const;
std::vector<llama_token> tokenize(
const std::string & raw_text,
bool add_special,
bool parse_special = false) const;
// does not write null-terminator to buf
int32_t token_to_piece(
llama_token token,
char * buf,
int32_t length,
int32_t lstrip,
bool special) const;
// use cached data
const std::string & token_to_piece(llama_token token) const;
int32_t detokenize(
const llama_token * tokens,
int32_t n_tokens,
char * text,
int32_t text_len_max,
bool remove_special,
bool unparse_special) const;
std::string detokenize(
const std::vector<llama_token> & tokens,
bool special) const;
void print_info() const;
struct impl {
uint32_t n_token_types = 0; // for BERT-style token types
enum llama_vocab_type type = LLAMA_VOCAB_TYPE_SPM;
enum llama_vocab_pre_type pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
int max_token_len = 0; // used for optimizing longest token search
// default LLaMA special tokens
// TODO: should we set all of these to LLAMA_TOKEN_NULL?
llama_token special_bos_id = 1;
llama_token special_eos_id = 2;
llama_token special_eot_id = LLAMA_TOKEN_NULL;
llama_token special_eom_id = LLAMA_TOKEN_NULL;
llama_token special_unk_id = 0;
llama_token special_sep_id = LLAMA_TOKEN_NULL;
llama_token special_pad_id = LLAMA_TOKEN_NULL;
llama_token special_mask_id = LLAMA_TOKEN_NULL;
llama_token linefeed_id = 13;
// fim tokens
llama_token special_fim_pre_id = LLAMA_TOKEN_NULL;
llama_token special_fim_suf_id = LLAMA_TOKEN_NULL;
llama_token special_fim_mid_id = LLAMA_TOKEN_NULL;
llama_token special_fim_pad_id = LLAMA_TOKEN_NULL;
llama_token special_fim_rep_id = LLAMA_TOKEN_NULL; // repo
llama_token special_fim_sep_id = LLAMA_TOKEN_NULL; // file separator
// tokenizer flags
bool add_space_prefix = false;
bool add_bos = false;
bool add_eos = false;
bool ignore_merges = false;
bool clean_spaces = false; // clean_up_tokenization_spaces
bool remove_extra_whitespaces = false;
bool escape_whitespaces = true;
bool treat_whitespace_as_suffix = false;
std::unordered_map<std::string, llama_token> token_to_id;
std::vector<token_data> id_to_token;
};
std::unique_ptr<impl> pimpl;
};
/* BEGIN VALL-E SPECIFIC HELPERS */
struct ggml_tensor * llama_get_embedding_weights(struct llama_model * model) {
return model->tok_embd;
}
struct ggml_tensor * llama_get_output_head_tensor(struct llama_model * model ) {
return model->output;
}
void llama_set_output_head(struct llama_model * model, struct ggml_tensor* tensor ) {
// set the output tensor
model->output = tensor;
// required to properly output logits
llama_vocab_hack* vocab = (llama_vocab_hack*) const_cast<llama_vocab*>(llama_model_get_vocab( model ));
vocab->pimpl->id_to_token.resize( tensor->ne[1] );
// *const_cast<uint32_t*>(&model->hparams.n_vocab) = tensor->ne[1];
}
/* END VALL-E SPECIFIC HELPERS */
/* End cringe code */

View File

@ -0,0 +1,78 @@
#pragma once
#include "ggml.h"
#include "ggml-alloc.h"
#include "ops.h"
struct encodec_lstm {
struct ggml_tensor *l0_ih_w;
struct ggml_tensor *l0_hh_w;
struct ggml_tensor *l0_ih_b;
struct ggml_tensor *l0_hh_b;
struct ggml_tensor *l1_ih_w;
struct ggml_tensor *l1_hh_w;
struct ggml_tensor *l1_ih_b;
struct ggml_tensor *l1_hh_b;
};
struct ggml_tensor *forward_pass_lstm_unilayer(struct ggml_context *ctx0,
struct ggml_tensor *inp,
struct ggml_tensor *weight_ih,
struct ggml_tensor *weight_hh,
struct ggml_tensor *bias_ih,
struct ggml_tensor *bias_hh,
char *prefix) {
const int seq_length = inp->ne[0];
const int input_dim = inp->ne[1];
const int hidden_dim = weight_ih->ne[1] / 4;
char ct_name[10];
char ht_name[10];
snprintf(ct_name, 10, "%s_ct", prefix);
snprintf(ht_name, 10, "%s_ht", prefix);
struct ggml_tensor *hs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length);
ggml_set_input(hs);
struct ggml_tensor *c_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim);
ggml_set_input(c_t);
ggml_set_name(c_t, ct_name);
struct ggml_tensor *h_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim);
ggml_set_input(h_t);
ggml_set_name(h_t, ht_name);
struct ggml_tensor *current = ggml_cont(ctx0, ggml_transpose(ctx0, inp));
for (int t = 0; t < seq_length; t++) {
struct ggml_tensor *x_t = ggml_view_1d(ctx0, current, input_dim, t * current->nb[1]);
struct ggml_tensor *inp_gates = ggml_mul_mat(ctx0, weight_ih, x_t);
inp_gates = ggml_add(ctx0, inp_gates, bias_ih);
struct ggml_tensor *hid_gates = ggml_mul_mat(ctx0, weight_hh, h_t);
hid_gates = ggml_add(ctx0, hid_gates, bias_hh);
struct ggml_tensor *out_gates = ggml_add(ctx0, inp_gates, hid_gates);
struct ggml_tensor *i_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 0 * sizeof(float) * hidden_dim));
struct ggml_tensor *f_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 1 * sizeof(float) * hidden_dim));
struct ggml_tensor *g_t = ggml_tanh(ctx0 , ggml_view_1d(ctx0, out_gates, hidden_dim, 2 * sizeof(float) * hidden_dim));
struct ggml_tensor *o_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 3 * sizeof(float) * hidden_dim));
c_t = ggml_add(ctx0, ggml_mul(ctx0, f_t, c_t), ggml_mul(ctx0, i_t, g_t));
h_t = ggml_mul(ctx0, o_t, ggml_tanh(ctx0, c_t));
hs = ggml_set_1d(ctx0, hs, h_t, t * hs->nb[1]);
}
hs = ggml_cont(ctx0, ggml_transpose(ctx0, hs));
return hs;
}

View File

@ -0,0 +1,17 @@
#pragma once
#include "ggml.h"
struct ggml_tensor *pad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
int padding_left, int padding_right);
struct ggml_tensor *unpad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
int padding_left, int padding_right);
struct ggml_tensor *strided_conv_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
struct ggml_tensor *conv_w, struct ggml_tensor *conv_b,
int stride);
struct ggml_tensor *strided_conv_transpose_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
struct ggml_tensor *conv_w, struct ggml_tensor *conv_b,
int stride);

View File

@ -0,0 +1,111 @@
#pragma once
#include <cassert>
#include <vector>
#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "utils.h"
struct encodec_quant_block {
struct ggml_tensor *embed;
};
struct encodec_quantizer {
std::vector<encodec_quant_block> blocks;
};
struct ggml_tensor *encodec_forward_quantizer_encode(
const struct encodec_quantizer *quantizer, struct ggml_context *ctx0,
struct ggml_tensor *encoded_inp, const int n_bins, const int sr, const int bandwidth,
const int hop_length) {
if (!encoded_inp) {
fprintf(stderr, "%s: null input tensor\n", __func__);
return NULL;
}
const int frame_rate = (int)ceilf(sr / hop_length);
const int n_q = get_num_quantizers_for_bandwidth(n_bins, frame_rate, bandwidth);
const int seq_length = encoded_inp->ne[0];
struct ggml_tensor *codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, seq_length, n_q);
ggml_set_input(codes);
struct ggml_tensor *inpL = ggml_cont(ctx0, ggml_transpose(ctx0, encoded_inp));
struct ggml_tensor *residual = inpL;
struct ggml_tensor *indices;
for (int i = 0; i < n_q; i++) {
encodec_quant_block block = quantizer->blocks[i];
// compute distance
// [seq_length, n_bins]
struct ggml_tensor *dp = ggml_scale(
ctx0, ggml_mul_mat(ctx0, block.embed, residual), -2.0f);
// [n_bins]
struct ggml_tensor *sqr_embed = ggml_sqr(ctx0, block.embed);
struct ggml_tensor *sqr_embed_nrm = ggml_sum_rows(ctx0, sqr_embed);
// [seq_length]
struct ggml_tensor *sqr_inp = ggml_sqr(ctx0, residual);
struct ggml_tensor *sqr_inp_nrm = ggml_sum_rows(ctx0, sqr_inp);
// [seq_length, n_bins]
struct ggml_tensor *dist = ggml_add(ctx0, ggml_repeat(ctx0, sqr_inp_nrm, dp), dp);
dist = ggml_add(ctx0, ggml_repeat(ctx0, ggml_transpose(ctx0, sqr_embed_nrm), dist), dist);
dist = ggml_neg(ctx0, dist);
// take the argmax over the column dimension
// [seq_length]
indices = ggml_argmax(ctx0, dist);
// look up in embedding table
struct ggml_tensor *quantized = ggml_get_rows(ctx0, block.embed, indices);
residual = ggml_sub(ctx0, residual, quantized);
codes = ggml_set_1d(ctx0, codes, indices, i * codes->nb[1]);
}
return codes;
}
struct ggml_tensor *encodec_forward_quantizer_decode(
const struct encodec_quantizer *quantizer, struct ggml_context *ctx0,
struct ggml_tensor *codes, const int hidden_dim, const int n_bins, const int sr, const int bandwidth,
const int hop_length) {
if (!codes) {
fprintf(stderr, "%s: null input tensor\n", __func__);
return NULL;
}
const int seq_length = codes->ne[0];
const int frame_rate = (int)ceilf(sr / hop_length);
const int n_q = get_num_quantizers_for_bandwidth(n_bins, frame_rate, bandwidth);
assert(n_q == codes->ne[1]);
struct ggml_tensor *quantized_out = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length);
ggml_set_input(quantized_out);
ggml_set_name(quantized_out, "quantized_out");
for (int i = 0; i < n_q; i++) {
encodec_quant_block block = quantizer->blocks[i];
struct ggml_tensor *indices = ggml_view_1d(ctx0, codes, seq_length, i * codes->nb[1]);
struct ggml_tensor *quantized = ggml_get_rows(ctx0, block.embed, indices);
quantized_out = ggml_add(ctx0, quantized_out, quantized);
}
quantized_out = ggml_cont(ctx0, ggml_transpose(ctx0, quantized_out));
return quantized_out;
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <cstddef>
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
const size_t MB = 1024 * 1024;
template <typename T>
void read_safe(std::ifstream &infile, T &dest) {
infile.read((char *)&dest, sizeof(T));
}
int32_t get_num_codebooks(float bandwidth, int hop_length, float sample_rate) {
// The number of codebooks is determined by the bandwidth selected.
// Supported bandwidths are 1.5kbps (n_q = 2), 3 kbps (n_q = 4), 6 kbps (n_q = 8),
// 12 kbps (n_q = 16) and 24kbps (n_q = 32).
return (int32_t)ceilf(1000 * bandwidth / (ceilf(sample_rate / hop_length) * 10));
}
int32_t get_bandwidth_per_quantizer(int bins, float frame_rate) {
return log2f((float)bins) * frame_rate;
}
int32_t get_num_quantizers_for_bandwidth(int bins, float frame_rate, float bandwidth) {
float bw_per_q = get_bandwidth_per_quantizer(bins, frame_rate);
int32_t n_q = MAX(1, floorf(bandwidth * 1000 / bw_per_q));
return n_q;
}

View File

@ -0,0 +1,178 @@
#pragma once
// C++ deps
#include <string>
#include <vector>
#include <unordered_map>
#include <llama.h>
// handles defining platform specific macros and import/export decorators (copied from my engine's uf/config.h)
#if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
// Windows
#define VALL_E_ENV "Windows"
#define VALL_E_ENV_WINDOWS 1
#define VALL_E_ENV_HEADER "windows.h"
#if defined(__CYGWIN__)
#define to_string(var) string(var)
#endif
#ifndef _WIN32_WINNT
#define _WIN32_WINNT 0x0600
#endif
#ifndef WINVER
#define WINVER 0x0600
#endif
#define VALL_E_IO_ROOT "./data/"
#elif defined(linux) || defined(__linux)
// Linux
#define VALL_E_ENV "Linux"
#define VALL_E_ENV_LINUX 1
#define VALL_E_ENV_HEADER "linux.h"
#define VALL_E_IO_ROOT "./data/"
#elif defined(__APPLE__) || defined(MACOSX) || defined(macintosh) || defined(Macintosh)
// MacOS
#define VALL_E_ENV "OSX"
#define VALL_E_ENV_OSX 1
#define VALL_E_ENV_HEADER "osx.h"
#define VALL_E_IO_ROOT "./data/"
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
// FreeBSD
#define VALL_E_ENV "FreeBSD"
#define VALL_E_ENV_FREEBSD 1
#define VALL_E_ENV_HEADER "freebsd.h"
#define VALL_E_IO_ROOT "./data/"
#elif defined(__sh__)
// Dreamcast
#define VALL_E_ENV "Dreamcast"
#define VALL_E_ENV_DREAMCAST 1
#define VALL_E_ENV_HEADER "dreamcast.h"
#include VALL_E_ENV_HEADER
#define _arch_dreamcast
#define VALL_E_IO_ROOT "/cd/"
#else
// Unsupported system
#define VALL_E_ENV "Unknown"
#define VALL_E_ENV_UNKNOWN 1
#define VALL_E_ENV_HEADER "unknown.h"
#warning Using "unknown"
#error No support
#endif
#if !defined(VALL_E_STATIC)
#if defined(VALL_E_ENV_WINDOWS)
// Windows compilers need specific (and different) keywords for export and import
#define VALL_E_API_EXPORT __declspec(dllexport)
#define VALL_E_API_IMPORT __declspec(dllimport)
// For Visual C++ compilers, we also need to turn off this annoying C4251 warning
#ifdef _MSC_VER
#pragma warning(disable : 4251)
#endif
#else // Linux, FreeBSD, Mac OS X
#if __GNUC__ >= 4
// GCC 4 has special keywords for showing/hidding symbols,
// the same keyword is used for both importing and exporting
#define VALL_E_API_EXPORT __attribute__ ((__visibility__ ("default")))
#define VALL_E_API_IMPORT __attribute__ ((__visibility__ ("default")))
#else
// GCC < 4 has no mechanism to explicitely hide symbols, everything's exported
#define VALL_E_API_EXPORT
#define VALL_E_API_IMPORT
#endif
#endif
#else
// Static build doesn't need import/export macros
#define VALL_E_API_EXPORT
#define VALL_E_API_IMPORT
#endif
#ifdef VALL_E_EXPORTS
#define VALL_E_API VALL_E_API_EXPORT
#else
#define VALL_E_API VALL_E_API_IMPORT
#endif
typedef llama_token token_t;
typedef std::vector<std::vector<token_t>> vall_e_audio_codes_t;
const int ENCODEC_FRAMES_PER_SECOND = 75;
const int MAX_DURATION = ENCODEC_FRAMES_PER_SECOND * 12;
const int CTX_SIZE = 2048;
const int N_THREADS = 8;
const int N_GPU_LAYERS = 99;
const int MODALITY_AR_NAR = 0;
const int MODALITY_NAR_LEN = 1;
// forward declarations
struct io_map_t;
struct llama_model;
struct llama_context;
struct encodec_context;
// model-specific parameters
struct vall_e_context_params_t {
std::string model_path = "./data/vall_e.gguf";
std::string encodec_path = "./data/encodec.bin";
int32_t gpu_layers = N_GPU_LAYERS;
int32_t n_threads = N_THREADS;
int32_t ctx_size = CTX_SIZE;
bool verbose = false;
};
// inference-specific arguments
struct vall_e_args_t {
std::string text = "Hello world.";
std::string prompt_path = "./data/prom.wav";
std::string output_path = "./data/resp.wav";
std::string language = "en";
std::string task = "tts";
int modality = MODALITY_NAR_LEN;
int max_steps = 30;
int max_duration = MAX_DURATION;
};
// stores everything needed for vall_e.cpp at runtime
struct vall_e_context_t {
vall_e_context_params_t params;
io_map_t* io_map = NULL; // pointer for reasons
struct {
llama_model* model = NULL;
llama_context* ctx = NULL;
} llama;
struct {
encodec_context* ctx;
} encodec;
};
// stores the raw inputs to be fed
struct vall_e_inputs_t {
std::string task = "tts";
std::string lang = "en";
token_t rvq_l = 0;
std::vector<token_t> phn = {};
vall_e_audio_codes_t prom = {};
vall_e_audio_codes_t resp = {};
};
// encodec helpers
VALL_E_API std::vector<float> read_audio_from_disk( const std::string& path );
VALL_E_API void write_audio_to_disk( const std::vector<float>& waveform, const std::string& path );
VALL_E_API std::vector<std::vector<int32_t>> encode_audio( struct encodec_context* ectx, const std::vector<float>& waveform );
VALL_E_API std::vector<float> decode_audio( struct encodec_context* ectx, const vall_e_audio_codes_t& codes_2d );
// context management
VALL_E_API void vall_e_print_usage( char** argv, const vall_e_context_params_t& params, const vall_e_args_t& args );
VALL_E_API bool vall_e_args_parse( int argc, char** argv, vall_e_context_params_t& params, vall_e_args_t& args );
VALL_E_API vall_e_context_t* vall_e_load( const vall_e_context_params_t& params );
VALL_E_API vall_e_inputs_t vall_e_prepare_inputs( vall_e_context_t* ctx, const std::string& text, const std::string& prompt_path, const std::string& lang = "auto", const std::string& task = "tts" );
VALL_E_API vall_e_audio_codes_t vall_e_generate( vall_e_context_t* ctx, vall_e_inputs_t& inputs, int max_steps, int max_duration, int modality = MODALITY_NAR_LEN );
VALL_E_API void vall_e_free( vall_e_context_t* ctx );

View File

@ -27,9 +27,12 @@ To be filled.
* *very* loosely integrated
* basic shapes and triangulated mesh collision and some form of ray queries
* OpenAL for audio
* Currently only loads from ogg (vorbis) files
* Currently only loads from `.ogg` (vorbis) files
* Supports loading in full and streaming
* *very* loosely integrated
* Speech synthesis using [vall_e.cpp](https://github.com/e-c-k-e-r/vall-e/)
* `win64.gcc.vulkan` binaries can be found [here](https://github.com/e-c-k-e-r/vall-e/releases/tag/vall_e.cpp), if compiled.
* currently only generates `.wav` files
## Supported Systems

View File

@ -0,0 +1,16 @@
#pragma once
#include <uf/config.h>
#if UF_USE_VALL_E
#include <vall_e.cpp/vall_e.h>
namespace ext {
namespace vall_e {
void UF_API initialize( const std::string& model_path = "", const std::string& encodec_path = "" );
std::string UF_API generate( const std::string& text, const std::string& prom, const std::string& lang = "en" );
void UF_API terminate();
}
}
#endif

View File

@ -7,4 +7,4 @@
#include "universal.h"
// defines which implementation to use
#include UF_ENV_HEADER
//
//

View File

@ -7,4 +7,4 @@
#include "universal.h"
// defines which implementation to use
#include UF_ENV_HEADER
// this line is required
//

View File

@ -7,4 +7,4 @@
#include "universal.h"
// defines which implementation to use
#include UF_ENV_HEADER
//
//

View File

@ -12,6 +12,7 @@ namespace spec {
protected:
public:
spec::uni::Time::time_t UF_API_CALL unixTime();
spec::uni::Time::time_t UF_API_CALL getTime();
};
};

View File

@ -7,4 +7,4 @@
#include "universal.h"
// defines which implementation to use
#include UF_ENV_HEADER
//
//

View File

@ -18,8 +18,9 @@
namespace uf {
namespace thread {
extern UF_API uf::stl::string workerThreadName;
extern UF_API uf::stl::string mainThreadName;
extern UF_API uf::stl::string workerThreadName;
extern UF_API uf::stl::string asyncThreadName;
}
}

View File

@ -90,6 +90,8 @@ namespace uf {
extern UF_API double previous;
extern UF_API float delta;
extern UF_API float clamp;
size_t UF_API time();
}
}

View File

@ -118,7 +118,26 @@ namespace {
this->scroll.bottom = true;
reclaimFocus = true;
uf::console::execute( command );
// to-do: add a way to either asynchronously invoke commands or not
uf::thread::queue( uf::thread::asyncThreadName, [=](){
uf::console::execute( command );
});
/*
// this blocks
uf::thread::queue( uf::thread::fetchWorker(), [=](){
uf::console::execute( command );
});
*/
/*
// this still blocks
auto tasks = uf::thread::schedule(true);
tasks.queue([=](){
uf::console::execute( command );
});
uf::thread::execute( tasks );
*/
}
ImGui::SetItemDefaultFocus();

View File

@ -0,0 +1,52 @@
#include <uf/ext/vall_e/vall_e.h>
#include <uf/utils/time/time.h>
#if UF_USE_VALL_E
namespace {
vall_e_context_t* ctx = NULL;
}
void ext::vall_e::initialize( const std::string& model_path, const std::string& encodec_path ) {
vall_e_context_params_t params;
params.model_path = model_path == "" ? "./data/llm/vall_e.gguf" : model_path;
params.encodec_path = encodec_path == "" ? "./data/llm/encodec.bin" : encodec_path;
params.gpu_layers = N_GPU_LAYERS;
params.n_threads = N_THREADS;
params.ctx_size = CTX_SIZE;
params.verbose = false;
::ctx = vall_e_load( params );
if ( !::ctx || !::ctx->llama.model || !::ctx->llama.ctx || !::ctx->encodec.ctx ) {
UF_MSG_ERROR("failed to initialize vall_e.cpp");
return;
}
}
std::string ext::vall_e::generate( const std::string& text, const std::string& prom, const std::string& lang ) {
if ( !::ctx ) return "";
std::string path = "./data/tmp/" + std::to_string(uf::time::time()) + ".wav";
vall_e_args_t args;
args.text = text;
args.prompt_path = prom;
args.output_path = path;
args.language = lang == "" ? "en" : lang;
args.task = "tts";
args.modality = MODALITY_NAR_LEN;
args.max_steps = 30;
args.max_duration = MAX_DURATION;
auto inputs = vall_e_prepare_inputs( ::ctx, args.text, args.prompt_path, args.language );
auto output_audio_codes = vall_e_generate( ::ctx, inputs, args.max_steps, args.max_duration, args.modality );
auto waveform = decode_audio( ::ctx->encodec.ctx, output_audio_codes );
write_audio_to_disk( waveform, args.output_path );
//UF_MSG_DEBUG("Generated to {}", path);
return path;
}
void ext::vall_e::terminate() {
if ( !::ctx ) return;
vall_e_free( ::ctx );
}
#endif

View File

@ -12,6 +12,9 @@ namespace {
chrono_time_t start = getTimePoint();
}
spec::uni::Time::time_t spec::uni::Time::unixTime() {
return std::chrono::duration_cast<std::chrono::microseconds>(getTimePoint().time_since_epoch()).count();
}
spec::uni::Time::time_t spec::uni::Time::getTime() {
std::chrono::duration<double> elapsed = getTimePoint() - start;
return elapsed.count() * 1000000;

View File

@ -38,18 +38,29 @@ void uf::console::initialize() {
});
uf::console::registerCommand("callHook", "Calls a hook, passing the arguments as a JSON object", [&]( const uf::stl::string& arguments )->uf::stl::string{
auto match = uf::string::match( arguments, "/^(.+?)(?: (.+?))?$/" );
auto match = uf::string::match( arguments, "/^\"?(.+?)\"?(?: (.+?))?$/" );
if ( match.empty() ) return "invalid invocation";
uf::stl::vector<pod::Hook::userdata_t> results;
if ( match.size() > 2 ) {
ext::json::Value json;
ext::json::decode( json, match[2] );
uf::hooks.call( match[1], json );
results = uf::hooks.call( match[1], json );
} else {
uf::hooks.call( match[1] );
results = uf::hooks.call( match[1] );
}
return "Hook executed: " + match[1];
// this could probably be its own function
uf::stl::string s_result = "";
for ( auto i = 0; i < results.size(); ++i ) {
auto& res = results[i];
if ( res.is<uf::stl::string>() ) s_result += ::fmt::format("\n[{}] => {}", i, res.as<uf::stl::string>());
else if ( res.is<ext::json::Value>() ) s_result += ::fmt::format("\n[{}] => {}", i, ext::json::encode( res.as<ext::json::Value>() ));
else s_result += ::fmt::format("\n[{}] => Userdata: {}", i, (void*) res);
}
return "Hook executed: " + match[1] + s_result;
});
uf::console::registerCommand("json", "Modifies the gamestate by setting a JSON value", [&]( const uf::stl::string& arguments )->uf::stl::string{

View File

@ -8,8 +8,9 @@ float uf::thread::limiter = 1.0f / 120.0f;
uint uf::thread::workers = 1;
std::thread::id uf::thread::mainThreadId = std::this_thread::get_id();
bool uf::thread::async = false;
uf::stl::string uf::thread::workerThreadName = "Worker";
uf::stl::string uf::thread::mainThreadName = "Main";
uf::stl::string uf::thread::workerThreadName = "Worker";
uf::stl::string uf::thread::asyncThreadName = "Async";
#define UF_THREAD_ANNOUNCE(...) UF_MSG_DEBUG(__VA_ARGS__)

View File

@ -10,4 +10,8 @@ size_t uf::time::frame = 0;
double uf::time::current = 0;
double uf::time::previous = 0;
float uf::time::delta = 0;
float uf::time::clamp = 0;
float uf::time::clamp = 0;
size_t uf::time::time() {
return spec::time.unixTime();
}

View File

@ -1,10 +1,10 @@
#include "main.h"
#include "ext.h"
#include <uf/ext/ext.h>
#include <uf/ext/oal/oal.h>
#include <uf/spec/terminal/terminal.h>
#include <uf/spec/controller/controller.h>
#include <fstream>
#include <iostream>
#include <regex>
#include <sys/stat.h>
#include <uf/utils/time/time.h>
#include <uf/utils/audio/audio.h>
@ -22,32 +22,27 @@
#include <uf/utils/graphic/graphic.h>
#include <uf/utils/camera/camera.h>
#include <uf/utils/http/http.h>
#include <uf/utils/renderer/renderer.h>
#include <uf/utils/io/console.h>
#include <uf/utils/io/inputs.h>
#include <uf/spec/terminal/terminal.h>
#include <uf/spec/controller/controller.h>
#include <uf/utils/memory/string.h>
#include <uf/engine/entity/entity.h>
#include <uf/engine/graph/graph.h>
#include <uf/utils/io/inputs.h>
#include <sys/stat.h>
#include <uf/utils/memory/string.h>
#include <fstream>
#include <iostream>
#include <regex>
#include "ext.h"
#include <uf/engine/scene/scene.h>
#include <uf/engine/asset/asset.h>
#include <uf/utils/renderer/renderer.h>
#include <uf/utils/io/console.h>
#include <uf/ext/ext.h>
#include <uf/ext/oal/oal.h>
#include <uf/ext/discord/discord.h>
#include <uf/ext/openvr/openvr.h>
#include <uf/ext/lua/lua.h>
#include <uf/ext/ultralight/ultralight.h>
#include <uf/ext/imgui/imgui.h>
#include <uf/ext/ffx/fsr.h>
#include <uf/ext/imgui/imgui.h>
#include <uf/ext/vall_e/vall_e.h>
bool ext::ready = false;
uf::stl::vector<uf::stl::string> ext::arguments;
@ -88,6 +83,11 @@ namespace {
struct {
bool enabled;
} ultralight, discord, imgui;
struct {
bool enabled;
std::string model_path = "";
std::string encodec_path = "";
} vall_e;
} ext;
struct {
@ -118,6 +118,10 @@ void EXT_API ext::load( ext::json::Value& json ) {
::config.engine.ext.ultralight.enabled = json["engine"]["ext"]["ultralight"]["enabled"].as(::config.engine.ext.ultralight.enabled);
::config.engine.ext.discord.enabled = json["engine"]["ext"]["discord"]["enabled"].as(::config.engine.ext.discord.enabled);
::config.engine.ext.imgui.enabled = json["engine"]["ext"]["imgui"]["enabled"].as(::config.engine.ext.imgui.enabled);
::config.engine.ext.vall_e.enabled = json["engine"]["ext"]["vall_e"]["enabled"].as(::config.engine.ext.vall_e.enabled);
::config.engine.ext.vall_e.model_path = json["engine"]["ext"]["vall_e"]["model_path"].as(::config.engine.ext.vall_e.model_path);
::config.engine.ext.vall_e.encodec_path = json["engine"]["ext"]["vall_e"]["encodec_path"].as(::config.engine.ext.vall_e.encodec_path);
::config.engine.limiter.print = json["engine"]["debug"]["framerate"]["print"].as(::config.engine.limiter.print);
@ -305,6 +309,9 @@ void EXT_API ext::initialize() {
/* Setup deferred Main thread */ {
uf::thread::get(uf::thread::mainThreadName);
}
/* Setup non-blocking, asynchronous thread */ {
uf::thread::get(uf::thread::asyncThreadName);
}
/* set JSON implicit preferences */ {
ext::json::PREFERRED_ENCODING = ::json["engine"]["ext"]["json"]["encoding"].as(ext::json::PREFERRED_ENCODING);
ext::json::PREFERRED_COMPRESSION = ::json["engine"]["ext"]["json"]["compression"].as(ext::json::PREFERRED_COMPRESSION);
@ -717,6 +724,23 @@ void EXT_API ext::initialize() {
if ( ::config.engine.ext.imgui.enabled ) {
// ext::imgui::initialize();
}
#endif
#if UF_USE_VALL_E
if ( ::config.engine.ext.vall_e.enabled ) {
ext::vall_e::initialize( ::config.engine.ext.vall_e.model_path, ::config.engine.ext.vall_e.encodec_path );
// bind the hook
uf::hooks.addHook( "llm:VALL-E.synthesize", [&](ext::json::Value& json){
auto text = json["text"].as<uf::stl::string>();
auto prom = json["prom"].as<uf::stl::string>();
auto path = ext::vall_e::generate( text, prom );
UF_MSG_DEBUG("Called {} {}: {}", text, prom, path);
return path;
});
}
#endif
/* Add hooks */ {
@ -1138,6 +1162,11 @@ void EXT_API ext::terminate() {
/* Terminate controllers */ {
spec::controller::terminate();
}
#if UF_USE_VALL_E
if ( ::config.engine.ext.vall_e.enabled ) {
ext::vall_e::terminate();
}
#endif
#if UF_USE_IMGUI
if ( ::config.engine.ext.imgui.enabled ) {
ext::imgui::terminate();