crammed in vall_e.cpp support to finally justify creating it (and a bunch of other things)
This commit is contained in:
parent
4f1ce314a5
commit
73ca9bb168
2
.gitignore
vendored
2
.gitignore
vendored
@ -55,3 +55,5 @@
|
||||
*.otf
|
||||
*.bin
|
||||
models/
|
||||
llm/
|
||||
tmp/
|
||||
11
Makefile
11
Makefile
@ -62,10 +62,10 @@ LIBS += -L$(ENGINE_LIB_DIR) -L$(LIB_DIR)/$(PREFIX_PATH) -L$(LIB_DIR)/$(ARCH
|
||||
|
||||
LINKS += $(UF_LIBS) $(EXT_LIBS) $(DEPS)
|
||||
DEPS +=
|
||||
FLAGS +=
|
||||
FLAGS += # -DUF_DEBUG
|
||||
|
||||
ifneq (,$(findstring -DUF_DEBUG,$(FLAGS)))
|
||||
REQ_DEPS += meshoptimizer toml xatlas curl ffx:fsr cpptrace # ncurses openvr draco discord bullet ultralight-ux
|
||||
REQ_DEPS += meshoptimizer toml xatlas curl ffx:fsr cpptrace vall_e # ncurses openvr draco discord bullet ultralight-ux
|
||||
FLAGS += -g
|
||||
endif
|
||||
ifneq (,$(findstring win64,$(ARCH)))
|
||||
@ -215,7 +215,7 @@ ifneq (,$(findstring bullet,$(REQ_DEPS)))
|
||||
DEPS += -lbulletdynamics -lbulletcollision -lbulletlinearmath
|
||||
else
|
||||
DEPS += -lBulletDynamics -lBulletCollision -lLinearMath
|
||||
INCS += -I./dep/bullet/
|
||||
INCS += -I./dep/include/bullet/
|
||||
endif
|
||||
endif
|
||||
ifneq (,$(findstring reactphysics,$(REQ_DEPS)))
|
||||
@ -248,6 +248,11 @@ endif
|
||||
ifneq (,$(findstring toml,$(REQ_DEPS)))
|
||||
FLAGS += -DUF_USE_TOML
|
||||
endif
|
||||
ifneq (,$(findstring vall_e,$(REQ_DEPS)))
|
||||
FLAGS += -DUF_USE_VALL_E
|
||||
INCS += -I./dep/include/vall_e.cpp/
|
||||
DEPS += -lvall_e
|
||||
endif
|
||||
|
||||
# SRCS_DLL += $(wildcard $(ENGINE_SRC_DIR)/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*/*/*.cpp)
|
||||
#SRCS_DLL += $(wildcard $(ENGINE_SRC_DIR)/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*/*/*.cpp) $(wildcard $(EXT_SRC_DIR)/*.cpp) $(wildcard $(EXT_SRC_DIR)/*/*.cpp) $(wildcard $(EXT_SRC_DIR)/*/*/*.cpp) $(wildcard $(EXT_SRC_DIR)/*/*/*/*.cpp) $(wildcard $(EXT_SRC_DIR)/*/*/*/*/*.cpp)
|
||||
|
||||
@ -18,7 +18,7 @@ To compile, run `make`. The outputted libraries and executables will be placed i
|
||||
|
||||
## Run
|
||||
|
||||
Currently, assets are not provided due to size (but mostly due to being test assets).
|
||||
Currently, a barebones setup is provided via the [`.zip` bundle](https://github.com/e-c-k-e-r/engine/releases/tag/bundle).
|
||||
|
||||
*If* adequate assets are provided, run `./program.sh` or `make run`. This ensures the path to the required libraries are added to the PATH.
|
||||
|
||||
|
||||
@ -288,6 +288,9 @@
|
||||
"encoding": "msgpack",
|
||||
"compression": "gz"
|
||||
},
|
||||
"vall_e": {
|
||||
"enabled": true
|
||||
},
|
||||
"imgui": {
|
||||
"enabled": true
|
||||
},
|
||||
|
||||
113
dep/include/vall_e.cpp/decoder.h
Normal file
113
dep/include/vall_e.cpp/decoder.h
Normal file
@ -0,0 +1,113 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-alloc.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#include "lstm.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
struct encodec_decoder_block {
|
||||
// upsampling layers
|
||||
struct ggml_tensor *us_conv_w;
|
||||
struct ggml_tensor *us_conv_b;
|
||||
|
||||
// conv1
|
||||
struct ggml_tensor *conv_1_w;
|
||||
struct ggml_tensor *conv_1_b;
|
||||
|
||||
// conv2
|
||||
struct ggml_tensor *conv_2_w;
|
||||
struct ggml_tensor *conv_2_b;
|
||||
|
||||
// shortcut
|
||||
struct ggml_tensor *conv_sc_w;
|
||||
struct ggml_tensor *conv_sc_b;
|
||||
};
|
||||
|
||||
struct encodec_decoder {
|
||||
struct ggml_tensor *init_conv_w;
|
||||
struct ggml_tensor *init_conv_b;
|
||||
|
||||
encodec_lstm lstm;
|
||||
|
||||
struct ggml_tensor *final_conv_w;
|
||||
struct ggml_tensor *final_conv_b;
|
||||
|
||||
std::vector<encodec_decoder_block> blocks;
|
||||
};
|
||||
|
||||
struct ggml_tensor *encodec_forward_decoder(
|
||||
const struct encodec_decoder *decoder, struct ggml_context *ctx0,
|
||||
struct ggml_tensor *quantized_out, const int *ratios, const int kernel_size, const int res_kernel_size,
|
||||
const int stride) {
|
||||
|
||||
if (!quantized_out) {
|
||||
fprintf(stderr, "%s: null input tensor\n", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct ggml_tensor *inpL = strided_conv_1d(
|
||||
ctx0, quantized_out, decoder->init_conv_w, decoder->init_conv_b, stride);
|
||||
|
||||
// lstm
|
||||
{
|
||||
struct ggml_tensor *cur = inpL;
|
||||
|
||||
const encodec_lstm lstm = decoder->lstm;
|
||||
|
||||
// first lstm layer
|
||||
char l0_prefix[7] = "dec_l0";
|
||||
struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
|
||||
ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix);
|
||||
|
||||
// second lstm layer
|
||||
char l1_prefix[7] = "dec_l1";
|
||||
struct ggml_tensor *out = forward_pass_lstm_unilayer(
|
||||
ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix);
|
||||
|
||||
inpL = ggml_add(ctx0, inpL, out);
|
||||
}
|
||||
|
||||
for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
|
||||
encodec_decoder_block block = decoder->blocks[layer_ix];
|
||||
|
||||
// upsampling layers
|
||||
inpL = ggml_elu(ctx0, inpL);
|
||||
|
||||
inpL = strided_conv_transpose_1d(
|
||||
ctx0, inpL, block.us_conv_w, block.us_conv_b, ratios[layer_ix]);
|
||||
|
||||
struct ggml_tensor *current = inpL;
|
||||
|
||||
// shortcut
|
||||
struct ggml_tensor *shortcut = strided_conv_1d(
|
||||
ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);
|
||||
|
||||
// conv1
|
||||
current = ggml_elu(ctx0, current);
|
||||
|
||||
current = strided_conv_1d(
|
||||
ctx0, current, block.conv_1_w, block.conv_1_b, stride);
|
||||
|
||||
// conv2
|
||||
current = ggml_elu(ctx0, current);
|
||||
|
||||
current = strided_conv_1d(
|
||||
ctx0, current, block.conv_2_w, block.conv_2_b, stride);
|
||||
|
||||
// residual connection
|
||||
inpL = ggml_add(ctx0, current, shortcut);
|
||||
}
|
||||
|
||||
// final conv
|
||||
inpL = ggml_elu(ctx0, inpL);
|
||||
|
||||
struct ggml_tensor *decoded_inp = strided_conv_1d(
|
||||
ctx0, inpL, decoder->final_conv_w, decoder->final_conv_b, stride);
|
||||
|
||||
return decoded_inp;
|
||||
}
|
||||
6434
dep/include/vall_e.cpp/dr_wav.h
Normal file
6434
dep/include/vall_e.cpp/dr_wav.h
Normal file
File diff suppressed because it is too large
Load Diff
184
dep/include/vall_e.cpp/encodec.h
Normal file
184
dep/include/vall_e.cpp/encodec.h
Normal file
@ -0,0 +1,184 @@
|
||||
/*
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2024 Pierre-Antoine Bannier │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
/*
|
||||
* This file contains the declarations of the structs and functions used in the encodec library.
|
||||
* The library provides functionality for audio compression and decompression using a custom model.
|
||||
* The model consists of an encoder, a quantizer and a decoder, each with their own set of parameters.
|
||||
* The library also provides functions for loading and freeing the model, as well as compressing and decompressing audio data.
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "ggml-alloc.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
struct encodec_context;
|
||||
|
||||
struct encodec_statistics {
|
||||
// The time taken to load the model.
|
||||
int64_t t_load_us;
|
||||
// The time taken to compute the model.
|
||||
int64_t t_compute_us;
|
||||
};
|
||||
|
||||
/**
|
||||
* Loads an encodec model from the specified file path.
|
||||
*
|
||||
* @param model_path The file path to the encodec model.
|
||||
* @param offset The offset (in bytes) to the start of the model in the file.
|
||||
* @param n_gpu_layers The number of GPU layers to use.
|
||||
* @return A pointer to the encodec context struct.
|
||||
*/
|
||||
struct encodec_context *encodec_load_model(
|
||||
const char *model_path,
|
||||
const int offset,
|
||||
int n_gpu_layers);
|
||||
|
||||
/**
|
||||
* Sets the target bandwidth for the given encodec context.
|
||||
*
|
||||
* @param ectx The encodec context to set the target bandwidth for.
|
||||
* @param bandwidth The target bandwidth to set, in bits per second.
|
||||
*/
|
||||
void encodec_set_target_bandwidth(
|
||||
struct encodec_context *ectx,
|
||||
int bandwidth);
|
||||
|
||||
/**
|
||||
* Sets the sample rate for the given encodec context.
|
||||
*
|
||||
* @param ectx The encodec context to set the target bandwidth for.
|
||||
* @param sample_rate The sample rate to set.
|
||||
*/
|
||||
void encodec_set_sample_rate(
|
||||
struct encodec_context *ectx,
|
||||
int sample_rate);
|
||||
|
||||
/**
|
||||
* Reconstructs audio from raw audio data using the specified encodec context.
|
||||
*
|
||||
* @param ectx The encodec context to use for reconstruction.
|
||||
* @param raw_audio The raw audio data to reconstruct.
|
||||
* @param n_samples The number of samples in the raw audio buffer.
|
||||
* @param n_threads The number of threads to use for reconstruction.
|
||||
* @return True if the reconstruction was successful, false otherwise.
|
||||
*/
|
||||
bool encodec_reconstruct_audio(
|
||||
struct encodec_context *ectx,
|
||||
const float *raw_audio,
|
||||
const int n_samples,
|
||||
int n_threads);
|
||||
|
||||
/**
|
||||
* Compresses audio data using the specified encodec context.
|
||||
*
|
||||
* @param ectx The encodec context to use for compression.
|
||||
* @param raw_audio The raw audio data to compress.
|
||||
* @param n_samples The number of samples in the raw audio buffer.
|
||||
* @param n_threads The number of threads to use for compression.
|
||||
* @return True if the compression was successful, false otherwise.
|
||||
*/
|
||||
bool encodec_compress_audio(
|
||||
struct encodec_context *ectx,
|
||||
const float *raw_audio,
|
||||
const int n_samples,
|
||||
int n_threads);
|
||||
|
||||
/**
|
||||
* Decompresses audio data using the specified encodec context.
|
||||
*
|
||||
* @param ectx The encodec context to use for decompression.
|
||||
* @param codes The compressed audio data to decompress.
|
||||
* @param n_codes The number of codes in the codes buffer.
|
||||
* @param n_threads The number of threads to use for decompression.
|
||||
* @return True if the audio data was successfully decompressed, false otherwise.
|
||||
*/
|
||||
bool encodec_decompress_audio(
|
||||
struct encodec_context *ectx,
|
||||
const int32_t *codes,
|
||||
const int n_codes,
|
||||
int n_threads);
|
||||
|
||||
/**
|
||||
* Gets the audio data from the given encodec context.
|
||||
*
|
||||
* @param ectx The encodec context to get the audio data from.
|
||||
* @return A pointer to the audio data.
|
||||
*/
|
||||
float * encodec_get_audio(
|
||||
struct encodec_context *ectx);
|
||||
|
||||
/**
|
||||
* Gets the size of the audio data from the given encodec context.
|
||||
*
|
||||
* @param ectx The encodec context to get the audio size from.
|
||||
* @return The size of the audio data.
|
||||
*/
|
||||
int encodec_get_audio_size(
|
||||
struct encodec_context *ectx);
|
||||
|
||||
/**
|
||||
* Gets the code data from the given encodec context.
|
||||
*
|
||||
* @param ectx The encodec context to get the code data from.
|
||||
* @return A pointer to the code data.
|
||||
*/
|
||||
int32_t * encodec_get_codes(
|
||||
struct encodec_context *ectx);
|
||||
|
||||
/**
|
||||
* Gets the size of the code data from the given encodec context.
|
||||
*
|
||||
* @param ectx The encodec context to get the code size from.
|
||||
* @return The size of the code data.
|
||||
*/
|
||||
int encodec_get_codes_size(
|
||||
struct encodec_context *ectx);
|
||||
|
||||
/**
|
||||
* Gets the statistics for the given encodec context.
|
||||
*
|
||||
* @param ectx The encodec context to get the statistics for.
|
||||
* @return A pointer to the statistics struct.
|
||||
*/
|
||||
const struct encodec_statistics* encodec_get_statistics(
|
||||
struct encodec_context *ectx);
|
||||
|
||||
/**
|
||||
* Reset the statistics for the given encodec context.
|
||||
*
|
||||
* @param ectx The encodec context to reset the statistics for.
|
||||
*/
|
||||
void encodec_reset_statistics(
|
||||
struct encodec_context *ectx);
|
||||
|
||||
/**
|
||||
* @brief Frees the memory allocated for an encodec context.
|
||||
*
|
||||
* @param ectx The encodec context to free.
|
||||
*/
|
||||
void encodec_free(
|
||||
struct encodec_context *ectx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
109
dep/include/vall_e.cpp/encoder.h
Normal file
109
dep/include/vall_e.cpp/encoder.h
Normal file
@ -0,0 +1,109 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ggml.h"
|
||||
#include "lstm.h"
|
||||
|
||||
// res + downsample block at some ratio
|
||||
struct encodec_encoder_block {
|
||||
// conv1
|
||||
struct ggml_tensor *conv_1_w;
|
||||
struct ggml_tensor *conv_1_b;
|
||||
|
||||
// conv2
|
||||
struct ggml_tensor *conv_2_w;
|
||||
struct ggml_tensor *conv_2_b;
|
||||
|
||||
// shortcut
|
||||
struct ggml_tensor *conv_sc_w;
|
||||
struct ggml_tensor *conv_sc_b;
|
||||
|
||||
// downsampling layers
|
||||
struct ggml_tensor *ds_conv_w;
|
||||
struct ggml_tensor *ds_conv_b;
|
||||
};
|
||||
|
||||
struct encodec_encoder {
|
||||
struct ggml_tensor *init_conv_w;
|
||||
struct ggml_tensor *init_conv_b;
|
||||
|
||||
encodec_lstm lstm;
|
||||
|
||||
struct ggml_tensor *final_conv_w;
|
||||
struct ggml_tensor *final_conv_b;
|
||||
|
||||
std::vector<encodec_encoder_block> blocks;
|
||||
};
|
||||
|
||||
struct ggml_tensor *encodec_forward_encoder(
|
||||
const struct encodec_encoder *encoder, struct ggml_context *ctx0,
|
||||
struct ggml_tensor *inp, const int * ratios, const int kernel_size, const int res_kernel_size,
|
||||
const int stride) {
|
||||
|
||||
if (!inp) {
|
||||
fprintf(stderr, "%s: null input tensor\n", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct ggml_tensor *inpL = strided_conv_1d(
|
||||
ctx0, inp, encoder->init_conv_w, encoder->init_conv_b, stride);
|
||||
|
||||
for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
|
||||
encodec_encoder_block block = encoder->blocks[layer_ix];
|
||||
|
||||
struct ggml_tensor *current = inpL;
|
||||
|
||||
// shortcut
|
||||
struct ggml_tensor *shortcut = strided_conv_1d(
|
||||
ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);
|
||||
|
||||
// conv1
|
||||
current = ggml_elu(ctx0, current);
|
||||
|
||||
current = strided_conv_1d(
|
||||
ctx0, current, block.conv_1_w, block.conv_1_b, stride);
|
||||
|
||||
// conv2
|
||||
current = ggml_elu(ctx0, current);
|
||||
|
||||
current = strided_conv_1d(
|
||||
ctx0, current, block.conv_2_w, block.conv_2_b, stride);
|
||||
|
||||
// residual connection
|
||||
inpL = ggml_add(ctx0, current, shortcut);
|
||||
|
||||
// downsampling layers
|
||||
inpL = ggml_elu(ctx0, inpL);
|
||||
|
||||
inpL = strided_conv_1d(
|
||||
ctx0, inpL, block.ds_conv_w, block.ds_conv_b, ratios[3 - layer_ix]);
|
||||
}
|
||||
|
||||
// lstm
|
||||
{
|
||||
struct ggml_tensor *cur = inpL;
|
||||
|
||||
const encodec_lstm lstm = encoder->lstm;
|
||||
|
||||
// first lstm layer
|
||||
char l0_prefix[7] = "enc_l0";
|
||||
struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
|
||||
ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix);
|
||||
|
||||
// second lstm layer
|
||||
char l1_prefix[7] = "enc_l1";
|
||||
struct ggml_tensor *out = forward_pass_lstm_unilayer(
|
||||
ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix);
|
||||
|
||||
inpL = ggml_add(ctx0, inpL, out);
|
||||
}
|
||||
|
||||
// final conv
|
||||
inpL = ggml_elu(ctx0, inpL);
|
||||
|
||||
struct ggml_tensor *encoded_inp = strided_conv_1d(
|
||||
ctx0, inpL, encoder->final_conv_w, encoder->final_conv_b, stride);
|
||||
|
||||
return encoded_inp;
|
||||
}
|
||||
103
dep/include/vall_e.cpp/espeak-ng/encoding.h
Normal file
103
dep/include/vall_e.cpp/espeak-ng/encoding.h
Normal file
@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright (C) 2017 Reece H. Dunn
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, see: <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef ESPEAK_NG_ENCODING_H
|
||||
#define ESPEAK_NG_ENCODING_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
ESPEAKNG_ENCODING_UNKNOWN,
|
||||
ESPEAKNG_ENCODING_US_ASCII,
|
||||
ESPEAKNG_ENCODING_ISO_8859_1,
|
||||
ESPEAKNG_ENCODING_ISO_8859_2,
|
||||
ESPEAKNG_ENCODING_ISO_8859_3,
|
||||
ESPEAKNG_ENCODING_ISO_8859_4,
|
||||
ESPEAKNG_ENCODING_ISO_8859_5,
|
||||
ESPEAKNG_ENCODING_ISO_8859_6,
|
||||
ESPEAKNG_ENCODING_ISO_8859_7,
|
||||
ESPEAKNG_ENCODING_ISO_8859_8,
|
||||
ESPEAKNG_ENCODING_ISO_8859_9,
|
||||
ESPEAKNG_ENCODING_ISO_8859_10,
|
||||
ESPEAKNG_ENCODING_ISO_8859_11,
|
||||
// ISO-8859-12 is not a valid encoding.
|
||||
ESPEAKNG_ENCODING_ISO_8859_13,
|
||||
ESPEAKNG_ENCODING_ISO_8859_14,
|
||||
ESPEAKNG_ENCODING_ISO_8859_15,
|
||||
ESPEAKNG_ENCODING_ISO_8859_16,
|
||||
ESPEAKNG_ENCODING_KOI8_R,
|
||||
ESPEAKNG_ENCODING_ISCII,
|
||||
ESPEAKNG_ENCODING_UTF_8,
|
||||
ESPEAKNG_ENCODING_ISO_10646_UCS_2,
|
||||
} espeak_ng_ENCODING;
|
||||
|
||||
ESPEAK_NG_API espeak_ng_ENCODING
|
||||
espeak_ng_EncodingFromName(const char *encoding);
|
||||
|
||||
typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER;
|
||||
|
||||
ESPEAK_NG_API espeak_ng_TEXT_DECODER *
|
||||
create_text_decoder(void);
|
||||
|
||||
ESPEAK_NG_API void
|
||||
destroy_text_decoder(espeak_ng_TEXT_DECODER *decoder);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
text_decoder_decode_string(espeak_ng_TEXT_DECODER *decoder,
|
||||
const char *string,
|
||||
int length,
|
||||
espeak_ng_ENCODING encoding);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
text_decoder_decode_string_auto(espeak_ng_TEXT_DECODER *decoder,
|
||||
const char *string,
|
||||
int length,
|
||||
espeak_ng_ENCODING encoding);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
text_decoder_decode_wstring(espeak_ng_TEXT_DECODER *decoder,
|
||||
const wchar_t *string,
|
||||
int length);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
text_decoder_decode_string_multibyte(espeak_ng_TEXT_DECODER *decoder,
|
||||
const void *input,
|
||||
espeak_ng_ENCODING encoding,
|
||||
int flags);
|
||||
|
||||
ESPEAK_NG_API int
|
||||
text_decoder_eof(espeak_ng_TEXT_DECODER *decoder);
|
||||
|
||||
ESPEAK_NG_API uint32_t
|
||||
text_decoder_getc(espeak_ng_TEXT_DECODER *decoder);
|
||||
|
||||
ESPEAK_NG_API uint32_t
|
||||
text_decoder_peekc(espeak_ng_TEXT_DECODER *decoder);
|
||||
|
||||
ESPEAK_NG_API const void *
|
||||
text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
223
dep/include/vall_e.cpp/espeak-ng/espeak_ng.h
Normal file
223
dep/include/vall_e.cpp/espeak-ng/espeak_ng.h
Normal file
@ -0,0 +1,223 @@
|
||||
/* eSpeak NG API.
|
||||
*
|
||||
* Copyright (C) 2015-2017 Reece H. Dunn
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef ESPEAK_NG_H
|
||||
#define ESPEAK_NG_H
|
||||
|
||||
#include <espeak-ng/speak_lib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#ifdef LIBESPEAK_NG_EXPORT
|
||||
#define ESPEAK_NG_API __declspec(dllexport)
|
||||
#else
|
||||
#define ESPEAK_NG_API __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#define ESPEAK_NG_API
|
||||
#endif
|
||||
|
||||
#define ESPEAKNG_DEFAULT_VOICE "en"
|
||||
|
||||
typedef enum {
|
||||
ENS_GROUP_MASK = 0x70000000,
|
||||
ENS_GROUP_ERRNO = 0x00000000, /* Values 0-255 map to errno error codes. */
|
||||
ENS_GROUP_ESPEAK_NG = 0x10000000, /* eSpeak NG error codes. */
|
||||
|
||||
/* eSpeak NG 1.49.0 */
|
||||
ENS_OK = 0,
|
||||
ENS_COMPILE_ERROR = 0x100001FF,
|
||||
ENS_VERSION_MISMATCH = 0x100002FF,
|
||||
ENS_FIFO_BUFFER_FULL = 0x100003FF,
|
||||
ENS_NOT_INITIALIZED = 0x100004FF,
|
||||
ENS_AUDIO_ERROR = 0x100005FF,
|
||||
ENS_VOICE_NOT_FOUND = 0x100006FF,
|
||||
ENS_MBROLA_NOT_FOUND = 0x100007FF,
|
||||
ENS_MBROLA_VOICE_NOT_FOUND = 0x100008FF,
|
||||
ENS_EVENT_BUFFER_FULL = 0x100009FF,
|
||||
ENS_NOT_SUPPORTED = 0x10000AFF,
|
||||
ENS_UNSUPPORTED_PHON_FORMAT = 0x10000BFF,
|
||||
ENS_NO_SPECT_FRAMES = 0x10000CFF,
|
||||
ENS_EMPTY_PHONEME_MANIFEST = 0x10000DFF,
|
||||
ENS_SPEECH_STOPPED = 0x10000EFF,
|
||||
|
||||
/* eSpeak NG 1.49.2 */
|
||||
ENS_UNKNOWN_PHONEME_FEATURE = 0x10000FFF,
|
||||
ENS_UNKNOWN_TEXT_ENCODING = 0x100010FF,
|
||||
} espeak_ng_STATUS;
|
||||
|
||||
typedef enum {
|
||||
ENOUTPUT_MODE_SYNCHRONOUS = 0x0001,
|
||||
ENOUTPUT_MODE_SPEAK_AUDIO = 0x0002,
|
||||
} espeak_ng_OUTPUT_MODE;
|
||||
|
||||
typedef enum {
|
||||
ENGENDER_UNKNOWN = 0,
|
||||
ENGENDER_MALE = 1,
|
||||
ENGENDER_FEMALE = 2,
|
||||
ENGENDER_NEUTRAL = 3,
|
||||
} espeak_ng_VOICE_GENDER;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
void (*outputPhoSymbol)(char* pho_code,int pho_type);
|
||||
void (*outputSilence)(short echo_tail);
|
||||
void (*outputVoiced)(short sample);
|
||||
void (*outputUnvoiced)(short sample);
|
||||
} espeak_ng_OUTPUT_HOOKS;
|
||||
|
||||
/* eSpeak NG 1.49.0 */
|
||||
|
||||
typedef struct espeak_ng_ERROR_CONTEXT_ *espeak_ng_ERROR_CONTEXT;
|
||||
|
||||
ESPEAK_NG_API void
|
||||
espeak_ng_ClearErrorContext(espeak_ng_ERROR_CONTEXT *context);
|
||||
|
||||
ESPEAK_NG_API void
|
||||
espeak_ng_GetStatusCodeMessage(espeak_ng_STATUS status,
|
||||
char *buffer,
|
||||
size_t length);
|
||||
|
||||
ESPEAK_NG_API void
|
||||
espeak_ng_PrintStatusCodeMessage(espeak_ng_STATUS status,
|
||||
FILE *out,
|
||||
espeak_ng_ERROR_CONTEXT context);
|
||||
|
||||
ESPEAK_NG_API void
|
||||
espeak_ng_InitializePath(const char *path);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_Initialize(espeak_ng_ERROR_CONTEXT *context);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_InitializeOutput(espeak_ng_OUTPUT_MODE output_mode,
|
||||
int buffer_length,
|
||||
const char *device);
|
||||
|
||||
ESPEAK_NG_API int
|
||||
espeak_ng_GetSampleRate(void);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_SetParameter(espeak_PARAMETER parameter,
|
||||
int value,
|
||||
int relative);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_SetPhonemeEvents(int enable, int ipa);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_SetPunctuationList(const wchar_t *punctlist);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_SetVoiceByName(const char *name);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_SetVoiceByFile(const char *filename);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_SetVoiceByProperties(espeak_VOICE *voice_selector);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_Synthesize(const void *text,
|
||||
size_t size,
|
||||
unsigned int position,
|
||||
espeak_POSITION_TYPE position_type,
|
||||
unsigned int end_position,
|
||||
unsigned int flags,
|
||||
unsigned int *unique_identifier,
|
||||
void *user_data);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_SynthesizeMark(const void *text,
|
||||
size_t size,
|
||||
const char *index_mark,
|
||||
unsigned int end_position,
|
||||
unsigned int flags,
|
||||
unsigned int *unique_identifier,
|
||||
void *user_data);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_SpeakKeyName(const char *key_name);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_SpeakCharacter(wchar_t character);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_Cancel(void);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_Synchronize(void);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_Terminate(void);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_CompileDictionary(const char *dsource,
|
||||
const char *dict_name,
|
||||
FILE *log,
|
||||
int flags,
|
||||
espeak_ng_ERROR_CONTEXT *context);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_CompileMbrolaVoice(const char *path,
|
||||
FILE *log,
|
||||
espeak_ng_ERROR_CONTEXT *context);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_CompilePhonemeData(long rate,
|
||||
FILE *log,
|
||||
espeak_ng_ERROR_CONTEXT *context);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_CompileIntonation(FILE *log,
|
||||
espeak_ng_ERROR_CONTEXT *context);
|
||||
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_CompileIntonationPath(const char *source_path,
|
||||
const char *destination_path,
|
||||
FILE *log,
|
||||
espeak_ng_ERROR_CONTEXT *context);
|
||||
|
||||
/* eSpeak NG 1.49.1 */
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_CompilePhonemeDataPath(long rate,
|
||||
const char *source_path,
|
||||
const char *destination_path,
|
||||
FILE *log,
|
||||
espeak_ng_ERROR_CONTEXT *context);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_SetOutputHooks(espeak_ng_OUTPUT_HOOKS* hooks);
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_SetConstF0(int f0);
|
||||
|
||||
ESPEAK_NG_API espeak_ng_STATUS
|
||||
espeak_ng_SetRandSeed(long seed);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
709
dep/include/vall_e.cpp/espeak-ng/speak_lib.h
Normal file
709
dep/include/vall_e.cpp/espeak-ng/speak_lib.h
Normal file
@ -0,0 +1,709 @@
|
||||
#ifndef SPEAK_LIB_H
|
||||
#define SPEAK_LIB_H
|
||||
/***************************************************************************
|
||||
* Copyright (C) 2005 to 2012 by Jonathan Duddington *
|
||||
* email: jonsd@users.sourceforge.net *
|
||||
* *
|
||||
* This program is free software; you can redistribute it and/or modify *
|
||||
* it under the terms of the GNU General Public License as published by *
|
||||
* the Free Software Foundation; either version 3 of the License, or *
|
||||
* (at your option) any later version. *
|
||||
* *
|
||||
* This program is distributed in the hope that it will be useful, *
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||||
* GNU General Public License for more details. *
|
||||
* *
|
||||
* You should have received a copy of the GNU General Public License *
|
||||
* along with this program; if not, see: *
|
||||
* <http://www.gnu.org/licenses/>. *
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
/*************************************************************/
|
||||
/* This is the header file for the library version of espeak */
|
||||
/* */
|
||||
/*************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#ifdef LIBESPEAK_NG_EXPORT
|
||||
#define ESPEAK_API __declspec(dllexport)
|
||||
#else
|
||||
#define ESPEAK_API __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#define ESPEAK_API
|
||||
#endif
|
||||
|
||||
#define ESPEAK_API_REVISION 12
|
||||
/*
|
||||
Revision 2
|
||||
Added parameter "options" to eSpeakInitialize()
|
||||
|
||||
Revision 3
|
||||
Added espeakWORDGAP to espeak_PARAMETER
|
||||
|
||||
Revision 4
|
||||
Added flags parameter to espeak_CompileDictionary()
|
||||
|
||||
Revision 5
|
||||
Added espeakCHARS_16BIT
|
||||
|
||||
Revision 6
|
||||
Added macros: espeakRATE_MINIMUM, espeakRATE_MAXIMUM, espeakRATE_NORMAL
|
||||
|
||||
Revision 7 24.Dec.2011
|
||||
Changed espeak_EVENT structure to add id.string[] for phoneme mnemonics.
|
||||
Added espeakINITIALIZE_PHONEME_IPA option for espeak_Initialize() to report phonemes as IPA names.
|
||||
|
||||
Revision 8 26.Apr.2013
|
||||
Added function espeak_TextToPhonemes().
|
||||
|
||||
Revision 9 30.May.2013
|
||||
Changed function espeak_TextToPhonemes().
|
||||
|
||||
Revision 10 29.Aug.2014
|
||||
Changed phonememode parameter to espeak_TextToPhonemes() and espeak_SetPhonemeTrace
|
||||
|
||||
Revision 11 (espeak-ng)
|
||||
Made ESPEAK_API import/export symbols correctly on Windows.
|
||||
|
||||
Revision 12 (espeak-ng)
|
||||
Exposed espeak_SetPhonemeCallback. This is available in eSpeak, but was not exposed in this header.
|
||||
|
||||
*/
|
||||
/********************/
|
||||
/* Initialization */
|
||||
/********************/
|
||||
|
||||
// values for 'value' in espeak_SetParameter(espeakRATE, value, 0), nominally in words-per-minute
|
||||
#define espeakRATE_MINIMUM 80
|
||||
#define espeakRATE_MAXIMUM 450
|
||||
#define espeakRATE_NORMAL 175
|
||||
|
||||
|
||||
typedef enum {
|
||||
espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list.
|
||||
espeakEVENT_WORD = 1, // Start of word
|
||||
espeakEVENT_SENTENCE = 2, // Start of sentence
|
||||
espeakEVENT_MARK = 3, // Mark
|
||||
espeakEVENT_PLAY = 4, // Audio element
|
||||
espeakEVENT_END = 5, // End of sentence or clause
|
||||
espeakEVENT_MSG_TERMINATED = 6, // End of message
|
||||
espeakEVENT_PHONEME = 7, // Phoneme, if enabled in espeak_Initialize()
|
||||
espeakEVENT_SAMPLERATE = 8 // Set sample rate
|
||||
} espeak_EVENT_TYPE;
|
||||
|
||||
|
||||
|
||||
typedef struct {
|
||||
espeak_EVENT_TYPE type;
|
||||
unsigned int unique_identifier; // message identifier (or 0 for key or character)
|
||||
int text_position; // the number of characters from the start of the text
|
||||
int length; // word length, in characters (for espeakEVENT_WORD)
|
||||
int audio_position; // the time in mS within the generated speech output data
|
||||
int sample; // sample id (internal use)
|
||||
void* user_data; // pointer supplied by the calling program
|
||||
union {
|
||||
int number; // used for WORD and SENTENCE events.
|
||||
const char *name; // used for MARK and PLAY events. UTF8 string
|
||||
char string[8]; // used for phoneme names (UTF8). Terminated by a zero byte unless the name needs the full 8 bytes.
|
||||
} id;
|
||||
} espeak_EVENT;
|
||||
/*
|
||||
When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called.
|
||||
|
||||
|
||||
In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED).
|
||||
|
||||
In PLAYBACK mode, the callback function is called as soon as an event happens.
|
||||
|
||||
For example suppose that the following message is supplied to espeak_Synth:
|
||||
"hello, hello."
|
||||
|
||||
|
||||
* Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function :
|
||||
|
||||
** Block 1:
|
||||
<audio data> +
|
||||
List of events: SENTENCE + WORD + LIST_TERMINATED
|
||||
|
||||
** Block 2:
|
||||
<audio data> +
|
||||
List of events: WORD + END + LIST_TERMINATED
|
||||
|
||||
** Block 3:
|
||||
no audio data
|
||||
List of events: MSG_TERMINATED + LIST_TERMINATED
|
||||
|
||||
|
||||
* Once processed in PLAYBACK mode, it could lead to 5 calls of the callback function:
|
||||
|
||||
** SENTENCE
|
||||
** WORD (call when the sounds are actually played)
|
||||
** WORD
|
||||
** END (call when the end of sentence is actually played.)
|
||||
** MSG_TERMINATED
|
||||
|
||||
|
||||
The MSG_TERMINATED event is the last event. It can inform the calling program to clear the user data related to the message.
|
||||
So if the synthesis must be stopped, the callback function is called for each pending message with the MSG_TERMINATED event.
|
||||
|
||||
A MARK event indicates a <mark> element in the text.
|
||||
A PLAY event indicates an <audio> element in the text, for which the calling program should play the named sound file.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
typedef enum {
|
||||
POS_CHARACTER = 1,
|
||||
POS_WORD,
|
||||
POS_SENTENCE
|
||||
} espeak_POSITION_TYPE;
|
||||
|
||||
|
||||
typedef enum {
|
||||
/* PLAYBACK mode: plays the audio data, supplies events to the calling program*/
|
||||
AUDIO_OUTPUT_PLAYBACK,
|
||||
|
||||
/* RETRIEVAL mode: supplies audio data and events to the calling program */
|
||||
AUDIO_OUTPUT_RETRIEVAL,
|
||||
|
||||
/* SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed */
|
||||
AUDIO_OUTPUT_SYNCHRONOUS,
|
||||
|
||||
/* Synchronous playback */
|
||||
AUDIO_OUTPUT_SYNCH_PLAYBACK
|
||||
|
||||
} espeak_AUDIO_OUTPUT;
|
||||
|
||||
|
||||
typedef enum {
|
||||
EE_OK=0,
|
||||
EE_INTERNAL_ERROR=-1,
|
||||
EE_BUFFER_FULL=1,
|
||||
EE_NOT_FOUND=2
|
||||
} espeak_ERROR;
|
||||
|
||||
#define espeakINITIALIZE_PHONEME_EVENTS 0x0001
|
||||
#define espeakINITIALIZE_PHONEME_IPA 0x0002
|
||||
#define espeakINITIALIZE_DONT_EXIT 0x8000
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path, int options);
|
||||
/* Must be called before any synthesis functions are called.
|
||||
output: the audio data can either be played by eSpeak or passed back by the SynthCallback function.
|
||||
|
||||
buflength: The length in mS of sound buffers passed to the SynthCallback function.
|
||||
Value=0 gives a default of 60mS.
|
||||
This parameter is only used for AUDIO_OUTPUT_RETRIEVAL and AUDIO_OUTPUT_SYNCHRONOUS modes.
|
||||
|
||||
path: The directory which contains the espeak-ng-data directory, or NULL for the default location.
|
||||
|
||||
options: bit 0: 1=allow espeakEVENT_PHONEME events.
|
||||
bit 1: 1= espeakEVENT_PHONEME events give IPA phoneme names, not eSpeak phoneme names
|
||||
bit 15: 1=don't exit if espeak_data is not found (used for --help)
|
||||
|
||||
Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR).
|
||||
*/
|
||||
|
||||
typedef int (t_espeak_callback)(short*, int, espeak_EVENT*);
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API void espeak_SetSynthCallback(t_espeak_callback* SynthCallback);
|
||||
/* Must be called before any synthesis functions are called.
|
||||
This specifies a function in the calling program which is called when a buffer of
|
||||
speech sound data has been produced.
|
||||
|
||||
|
||||
The callback function is of the form:
|
||||
|
||||
int SynthCallback(short *wav, int numsamples, espeak_EVENT *events);
|
||||
|
||||
wav: is the speech sound data which has been produced.
|
||||
NULL indicates that the synthesis has been completed.
|
||||
|
||||
numsamples: is the number of entries in wav. This number may vary, may be less than
|
||||
the value implied by the buflength parameter given in espeak_Initialize, and may
|
||||
sometimes be zero (which does NOT indicate end of synthesis).
|
||||
|
||||
events: an array of espeak_EVENT items which indicate word and sentence events, and
|
||||
also the occurrence if <mark> and <audio> elements within the text. The list of
|
||||
events is terminated by an event of type = 0.
|
||||
|
||||
|
||||
Callback returns: 0=continue synthesis, 1=abort synthesis.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API void espeak_SetUriCallback(int (*UriCallback)(int, const char*, const char*));
|
||||
/* This function may be called before synthesis functions are used, in order to deal with
|
||||
<audio> tags. It specifies a callback function which is called when an <audio> element is
|
||||
encountered and allows the calling program to indicate whether the sound file which
|
||||
is specified in the <audio> element is available and is to be played.
|
||||
|
||||
The callback function is of the form:
|
||||
|
||||
int UriCallback(int type, const char *uri, const char *base);
|
||||
|
||||
type: type of callback event. Currently only 1= <audio> element
|
||||
|
||||
uri: the "src" attribute from the <audio> element
|
||||
|
||||
base: the "xml:base" attribute (if any) from the <speak> element
|
||||
|
||||
Return: 1=don't play the sound, but speak the text alternative.
|
||||
0=place a PLAY event in the event list at the point where the <audio> element
|
||||
occurs. The calling program can then play the sound at that point.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API void espeak_SetPhonemeCallback(int (*PhonemeCallback)(const char *));
|
||||
|
||||
|
||||
/********************/
|
||||
/* Synthesis */
|
||||
/********************/
|
||||
|
||||
|
||||
#define espeakCHARS_AUTO 0
|
||||
#define espeakCHARS_UTF8 1
|
||||
#define espeakCHARS_8BIT 2
|
||||
#define espeakCHARS_WCHAR 3
|
||||
#define espeakCHARS_16BIT 4
|
||||
|
||||
#define espeakSSML 0x10
|
||||
#define espeakPHONEMES 0x100
|
||||
#define espeakENDPAUSE 0x1000
|
||||
#define espeakKEEP_NAMEDATA 0x2000
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_ERROR espeak_Synth(const void *text,
|
||||
size_t size,
|
||||
unsigned int position,
|
||||
espeak_POSITION_TYPE position_type,
|
||||
unsigned int end_position,
|
||||
unsigned int flags,
|
||||
unsigned int* unique_identifier,
|
||||
void* user_data);
|
||||
/* Synthesize speech for the specified text. The speech sound data is passed to the calling
|
||||
program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak.
|
||||
|
||||
text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters,
|
||||
wide characters (wchar_t), or UTF8 encoding. Which of these is determined by the "flags"
|
||||
parameter.
|
||||
|
||||
size: Equal to (or greatrer than) the size of the text data, in bytes. This is used in order
|
||||
to allocate internal storage space for the text. This value is not used for
|
||||
AUDIO_OUTPUT_SYNCHRONOUS mode.
|
||||
|
||||
position: The position in the text where speaking starts. Zero indicates speak from the
|
||||
start of the text.
|
||||
|
||||
position_type: Determines whether "position" is a number of characters, words, or sentences.
|
||||
Values:
|
||||
|
||||
end_position: If set, this gives a character position at which speaking will stop. A value
|
||||
of zero indicates no end position.
|
||||
|
||||
flags: These may be OR'd together:
|
||||
Type of character codes, one of:
|
||||
espeakCHARS_UTF8 UTF8 encoding
|
||||
espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language.
|
||||
espeakCHARS_AUTO 8 bit or UTF8 (this is the default)
|
||||
espeakCHARS_WCHAR Wide characters (wchar_t)
|
||||
espeakCHARS_16BIT 16 bit characters.
|
||||
|
||||
espeakSSML Elements within < > are treated as SSML elements, or if not recognised are ignored.
|
||||
|
||||
espeakPHONEMES Text within [[ ]] is treated as phonemes codes (in espeak's Kirshenbaum encoding).
|
||||
|
||||
espeakENDPAUSE If set then a sentence pause is added at the end of the text. If not set then
|
||||
this pause is suppressed.
|
||||
|
||||
unique_identifier: This must be either NULL, or point to an integer variable to
|
||||
which eSpeak writes a message identifier number.
|
||||
eSpeak includes this number in espeak_EVENT messages which are the result of
|
||||
this call of espeak_Synth().
|
||||
|
||||
user_data: a pointer (or NULL) which will be passed to the callback function in
|
||||
espeak_EVENT messages.
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_ERROR espeak_Synth_Mark(const void *text,
|
||||
size_t size,
|
||||
const char *index_mark,
|
||||
unsigned int end_position,
|
||||
unsigned int flags,
|
||||
unsigned int* unique_identifier,
|
||||
void* user_data);
|
||||
/* Synthesize speech for the specified text. Similar to espeak_Synth() but the start position is
|
||||
specified by the name of a <mark> element in the text.
|
||||
|
||||
index_mark: The "name" attribute of a <mark> element within the text which specified the
|
||||
point at which synthesis starts. UTF8 string.
|
||||
|
||||
For the other parameters, see espeak_Synth()
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_ERROR espeak_Key(const char *key_name);
|
||||
/* Speak the name of a keyboard key.
|
||||
If key_name is a single character, it speaks the name of the character.
|
||||
Otherwise, it speaks key_name as a text string.
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_ERROR espeak_Char(wchar_t character);
|
||||
/* Speak the name of the given character
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
/***********************/
|
||||
/* Speech Parameters */
|
||||
/***********************/
|
||||
|
||||
typedef enum {
|
||||
espeakSILENCE=0, /* internal use */
|
||||
espeakRATE=1,
|
||||
espeakVOLUME=2,
|
||||
espeakPITCH=3,
|
||||
espeakRANGE=4,
|
||||
espeakPUNCTUATION=5,
|
||||
espeakCAPITALS=6,
|
||||
espeakWORDGAP=7,
|
||||
espeakOPTIONS=8, // reserved for misc. options. not yet used
|
||||
espeakINTONATION=9,
|
||||
espeakSSML_BREAK_MUL=10,
|
||||
|
||||
espeakRESERVED2=11,
|
||||
espeakEMPHASIS, /* internal use */
|
||||
espeakLINELENGTH, /* internal use */
|
||||
espeakVOICETYPE, // internal, 1=mbrola
|
||||
N_SPEECH_PARAM /* last enum */
|
||||
} espeak_PARAMETER;
|
||||
|
||||
typedef enum {
|
||||
espeakPUNCT_NONE=0,
|
||||
espeakPUNCT_ALL=1,
|
||||
espeakPUNCT_SOME=2
|
||||
} espeak_PUNCT_TYPE;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative);
|
||||
/* Sets the value of the specified parameter.
|
||||
relative=0 Sets the absolute value of the parameter.
|
||||
relative=1 Sets a relative value of the parameter.
|
||||
|
||||
parameter:
|
||||
espeakRATE: speaking speed in word per minute. Values 80 to 450.
|
||||
|
||||
espeakVOLUME: volume in range 0-200 or more.
|
||||
0=silence, 100=normal full volume, greater values may produce amplitude compression or distortion
|
||||
|
||||
espeakPITCH: base pitch, range 0-100. 50=normal
|
||||
|
||||
espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal
|
||||
|
||||
espeakPUNCTUATION: which punctuation characters to announce:
|
||||
value in espeak_PUNCT_TYPE (none, all, some),
|
||||
see espeak_GetParameter() to specify which characters are announced.
|
||||
|
||||
espeakCAPITALS: announce capital letters by:
|
||||
0=none,
|
||||
1=sound icon,
|
||||
2=spelling,
|
||||
3 or higher, by raising pitch. This values gives the amount in Hz by which the pitch
|
||||
of a word raised to indicate it has a capital letter.
|
||||
|
||||
espeakWORDGAP: pause between words, units of 10mS (at the default speed)
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API int espeak_GetParameter(espeak_PARAMETER parameter, int current);
|
||||
/* current=0 Returns the default value of the specified parameter.
|
||||
current=1 Returns the current value of the specified parameter, as set by SetParameter()
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_ERROR espeak_SetPunctuationList(const wchar_t *punctlist);
|
||||
/* Specified a list of punctuation characters whose names are to be spoken when the
|
||||
value of the Punctuation parameter is set to "some".
|
||||
|
||||
punctlist: A list of character codes, terminated by a zero character.
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR.
|
||||
*/
|
||||
|
||||
#define espeakPHONEMES_SHOW 0x01
|
||||
#define espeakPHONEMES_IPA 0x02
|
||||
#define espeakPHONEMES_TRACE 0x08
|
||||
#define espeakPHONEMES_MBROLA 0x10
|
||||
#define espeakPHONEMES_TIE 0x80
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API void espeak_SetPhonemeTrace(int phonememode, FILE *stream);
|
||||
/* phonememode: Controls the output of phoneme symbols for the text
|
||||
bits 0-2:
|
||||
value=0 No phoneme output (default)
|
||||
value=1 Output the translated phoneme symbols for the text
|
||||
value=2 as (1), but produces IPA phoneme names rather than ascii
|
||||
bit 3: output a trace of how the translation was done (showing the matching rules and list entries)
|
||||
bit 4: produce pho data for mbrola
|
||||
bit 7: use (bits 8-23) as a tie within multi-letter phonemes names
|
||||
bits 8-23: separator character, between phoneme names
|
||||
|
||||
stream output stream for the phoneme symbols (and trace). If stream=NULL then it uses stdout.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API const char *espeak_TextToPhonemes(const void **textptr, int textmode, int phonememode);
|
||||
/* Translates text into phonemes. Call espeak_SetVoiceByName() first, to select a language.
|
||||
|
||||
It returns a pointer to a character string which contains the phonemes for the text up to
|
||||
end of a sentence, or comma, semicolon, colon, or similar punctuation.
|
||||
|
||||
textptr: The address of a pointer to the input text which is terminated by a zero character.
|
||||
On return, the pointer has been advanced past the text which has been translated, or else set
|
||||
to NULL to indicate that the end of the text has been reached.
|
||||
|
||||
textmode: Type of character codes, one of:
|
||||
espeakCHARS_UTF8 UTF8 encoding
|
||||
espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language.
|
||||
espeakCHARS_AUTO 8 bit or UTF8 (this is the default)
|
||||
espeakCHARS_WCHAR Wide characters (wchar_t)
|
||||
espeakCHARS_16BIT 16 bit characters.
|
||||
|
||||
phoneme_mode
|
||||
bit 1: 0=eSpeak's ascii phoneme names, 1= International Phonetic Alphabet (as UTF-8 characters).
|
||||
bit 7: use (bits 8-23) as a tie within multi-letter phonemes names
|
||||
bits 8-23: separator character, between phoneme names
|
||||
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API void espeak_CompileDictionary(const char *path, FILE *log, int flags);
|
||||
/* Compile pronunciation dictionary for a language which corresponds to the currently
|
||||
selected voice. The required voice should be selected before calling this function.
|
||||
|
||||
path: The directory which contains the language's '_rules' and '_list' files.
|
||||
'path' should end with a path separator character ('/').
|
||||
log: Stream for error reports and statistics information. If log=NULL then stderr will be used.
|
||||
|
||||
flags: Bit 0: include source line information for debug purposes (This is displayed with the
|
||||
-X command line option).
|
||||
*/
|
||||
/***********************/
|
||||
/* Voice Selection */
|
||||
/***********************/
|
||||
|
||||
|
||||
// voice table
|
||||
typedef struct {
|
||||
const char *name; // a given name for this voice. UTF8 string.
|
||||
const char *languages; // list of pairs of (byte) priority + (string) language (and dialect qualifier)
|
||||
const char *identifier; // the filename for this voice within espeak-ng-data/voices
|
||||
unsigned char gender; // 0=none 1=male, 2=female,
|
||||
unsigned char age; // 0=not specified, or age in years
|
||||
unsigned char variant; // only used when passed as a parameter to espeak_SetVoiceByProperties
|
||||
unsigned char xx1; // for internal use
|
||||
int score; // for internal use
|
||||
void *spare; // for internal use
|
||||
} espeak_VOICE;
|
||||
|
||||
/* Note: The espeak_VOICE structure is used for two purposes:
|
||||
1. To return the details of the available voices.
|
||||
2. As a parameter to espeak_SetVoiceByProperties() in order to specify selection criteria.
|
||||
|
||||
In (1), the "languages" field consists of a list of (UTF8) language names for which this voice
|
||||
may be used, each language name in the list is terminated by a zero byte and is also preceded by
|
||||
a single byte which gives a "priority" number. The list of languages is terminated by an
|
||||
additional zero byte.
|
||||
|
||||
A language name consists of a language code, optionally followed by one or more qualifier (dialect)
|
||||
names separated by hyphens (eg. "en-uk"). A voice might, for example, have languages "en-uk" and
|
||||
"en". Even without "en" listed, voice would still be selected for the "en" language (because
|
||||
"en-uk" is related) but at a lower priority.
|
||||
|
||||
The priority byte indicates how the voice is preferred for the language. A low number indicates a
|
||||
more preferred voice, a higher number indicates a less preferred voice.
|
||||
|
||||
In (2), the "languages" field consists simply of a single (UTF8) language name, with no preceding
|
||||
priority byte.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API const espeak_VOICE **espeak_ListVoices(espeak_VOICE *voice_spec);
|
||||
/* Reads the voice files from espeak-ng-data/voices and creates an array of espeak_VOICE pointers.
|
||||
The list is terminated by a NULL pointer
|
||||
|
||||
If voice_spec is NULL then all voices are listed.
|
||||
If voice spec is given, then only the voices which are compatible with the voice_spec
|
||||
are listed, and they are listed in preference order.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_ERROR espeak_SetVoiceByFile(const char *filename);
|
||||
/* Loads a voice given the file path. Language is not considered.
|
||||
"filename" is a UTF8 string.
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_ERROR espeak_SetVoiceByName(const char *name);
|
||||
/* Searches for a voice with a matching "name" field. Language is not considered.
|
||||
"name" is a UTF8 string.
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_ERROR espeak_SetVoiceByProperties(espeak_VOICE *voice_spec);
|
||||
/* An espeak_VOICE structure is used to pass criteria to select a voice. Any of the following
|
||||
fields may be set:
|
||||
|
||||
name NULL, or a voice name
|
||||
|
||||
languages NULL, or a single language string (with optional dialect), eg. "en-uk", or "en"
|
||||
|
||||
gender 0=not specified, 1=male, 2=female
|
||||
|
||||
age 0=not specified, or an age in years
|
||||
|
||||
variant After a list of candidates is produced, scored and sorted, "variant" is used to index
|
||||
that list and choose a voice.
|
||||
variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_VOICE *espeak_GetCurrentVoice(void);
|
||||
/* Returns the espeak_VOICE data for the currently selected voice.
|
||||
This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s>
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_ERROR espeak_Cancel(void);
|
||||
/* Stop immediately synthesis and audio output of the current text. When this
|
||||
function returns, the audio output is fully stopped and the synthesizer is ready to
|
||||
synthesize a new message.
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_INTERNAL_ERROR.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API int espeak_IsPlaying(void);
|
||||
/* Returns 1 if audio is played, 0 otherwise.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_ERROR espeak_Synchronize(void);
|
||||
/* This function returns when all data have been spoken.
|
||||
Return: EE_OK: operation achieved
|
||||
EE_INTERNAL_ERROR.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API espeak_ERROR espeak_Terminate(void);
|
||||
/* last function to be called.
|
||||
Return: EE_OK: operation achieved
|
||||
EE_INTERNAL_ERROR.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
ESPEAK_API const char *espeak_Info(const char **path_data);
|
||||
/* Returns the version number string.
|
||||
path_data returns the path to espeak_data
|
||||
*/
|
||||
#endif
|
||||
76
dep/include/vall_e.cpp/ggml-alloc.h
Normal file
76
dep/include/vall_e.cpp/ggml-alloc.h
Normal file
@ -0,0 +1,76 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
||||
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
||||
typedef struct ggml_backend * ggml_backend_t;
|
||||
|
||||
// Tensor allocator
|
||||
struct ggml_tallocr {
|
||||
ggml_backend_buffer_t buffer;
|
||||
void * base;
|
||||
size_t alignment;
|
||||
size_t offset;
|
||||
};
|
||||
|
||||
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
|
||||
GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
|
||||
|
||||
// Graph allocator
|
||||
/*
|
||||
Example usage:
|
||||
ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
|
||||
|
||||
// optional: create a worst-case graph and reserve the buffers to avoid reallocations
|
||||
ggml_gallocr_reserve(galloc, build_graph(max_batch));
|
||||
|
||||
// allocate the graph
|
||||
struct ggml_cgraph * graph = build_graph(batch);
|
||||
ggml_gallocr_alloc_graph(galloc, graph);
|
||||
|
||||
printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0));
|
||||
|
||||
// evaluate the graph
|
||||
ggml_backend_graph_compute(backend, graph);
|
||||
*/
|
||||
|
||||
// special tensor flags for use with the graph allocator:
|
||||
// ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
|
||||
// ggml_set_output(): output tensors are never freed and never overwritten
|
||||
|
||||
typedef struct ggml_gallocr * ggml_gallocr_t;
|
||||
|
||||
GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft);
|
||||
GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs);
|
||||
GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
|
||||
|
||||
// pre-allocate buffers from a measure graph - does not allocate or modify the graph
|
||||
// call with a worst-case graph to avoid buffer reallocations
|
||||
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
|
||||
// returns false if the buffer allocation failed
|
||||
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
|
||||
GGML_API bool ggml_gallocr_reserve_n(
|
||||
ggml_gallocr_t galloc,
|
||||
struct ggml_cgraph * graph,
|
||||
const int * node_buffer_ids,
|
||||
const int * leaf_buffer_ids);
|
||||
|
||||
// automatic reallocation if the topology changes when using a single buffer
|
||||
// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
|
||||
GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
|
||||
|
||||
GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
|
||||
|
||||
// Utils
|
||||
// Create a buffer and allocate all the tensors in a ggml_context
|
||||
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
|
||||
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
354
dep/include/vall_e.cpp/ggml-backend.h
Normal file
354
dep/include/vall_e.cpp/ggml-backend.h
Normal file
@ -0,0 +1,354 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-alloc.h"
|
||||
|
||||
#ifdef GGML_BACKEND_SHARED
|
||||
# if defined(_WIN32) && !defined(__MINGW32__)
|
||||
# ifdef GGML_BACKEND_BUILD
|
||||
# define GGML_BACKEND_API __declspec(dllexport) extern
|
||||
# else
|
||||
# define GGML_BACKEND_API __declspec(dllimport) extern
|
||||
# endif
|
||||
# else
|
||||
# define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern
|
||||
# endif
|
||||
#else
|
||||
# define GGML_BACKEND_API extern
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
||||
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
||||
typedef struct ggml_backend_event * ggml_backend_event_t;
|
||||
typedef struct ggml_backend * ggml_backend_t;
|
||||
typedef void * ggml_backend_graph_plan_t;
|
||||
typedef struct ggml_backend_reg * ggml_backend_reg_t;
|
||||
typedef struct ggml_backend_device * ggml_backend_dev_t;
|
||||
|
||||
|
||||
//
|
||||
// Backend buffer type
|
||||
//
|
||||
|
||||
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
||||
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
||||
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
||||
GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
||||
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
||||
GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft);
|
||||
|
||||
//
|
||||
// Backend buffer
|
||||
//
|
||||
|
||||
enum ggml_backend_buffer_usage {
|
||||
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
||||
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
|
||||
GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2,
|
||||
};
|
||||
|
||||
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
|
||||
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
||||
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
||||
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
||||
GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
||||
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
||||
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
||||
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
||||
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
||||
GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer);
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
|
||||
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
|
||||
|
||||
// tensor copy between different backends
|
||||
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
|
||||
//
|
||||
// Backend (stream)
|
||||
//
|
||||
|
||||
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
|
||||
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
|
||||
GGML_API void ggml_backend_free(ggml_backend_t backend);
|
||||
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
||||
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
||||
GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
|
||||
|
||||
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||
|
||||
// "offset" refers to the offset in tensor->data for setting/getting data
|
||||
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||
GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
||||
|
||||
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
||||
|
||||
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
|
||||
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
|
||||
// NOTE: will be removed, use device version instead
|
||||
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||
GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
|
||||
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||
|
||||
// asynchronous copy
|
||||
// the copy is performed after all the currently queued operations in backend_src
|
||||
// backend_dst will wait for the copy to complete before performing other operations
|
||||
// automatic fallback to sync copy if async is not supported
|
||||
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
|
||||
GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend);
|
||||
|
||||
//
|
||||
// Events
|
||||
//
|
||||
|
||||
GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device);
|
||||
GGML_API void ggml_backend_event_free(ggml_backend_event_t event);
|
||||
GGML_API void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend);
|
||||
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
||||
GGML_API void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event);
|
||||
|
||||
//
|
||||
// Backend device
|
||||
//
|
||||
|
||||
enum ggml_backend_dev_type {
|
||||
// CPU device using system memory
|
||||
GGML_BACKEND_DEVICE_TYPE_CPU,
|
||||
// GPU device using dedicated memory
|
||||
GGML_BACKEND_DEVICE_TYPE_GPU,
|
||||
// accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
|
||||
GGML_BACKEND_DEVICE_TYPE_ACCEL
|
||||
};
|
||||
|
||||
// functionality supported by the device
|
||||
struct ggml_backend_dev_caps {
|
||||
// asynchronous operations
|
||||
bool async;
|
||||
// pinned host buffer
|
||||
bool host_buffer;
|
||||
// creating buffers from host ptr
|
||||
bool buffer_from_host_ptr;
|
||||
// event synchronization
|
||||
bool events;
|
||||
};
|
||||
|
||||
// all the device properties
|
||||
struct ggml_backend_dev_props {
|
||||
const char * name;
|
||||
const char * description;
|
||||
size_t memory_free;
|
||||
size_t memory_total;
|
||||
enum ggml_backend_dev_type type;
|
||||
struct ggml_backend_dev_caps caps;
|
||||
};
|
||||
|
||||
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
|
||||
GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device);
|
||||
GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
|
||||
GGML_API enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device);
|
||||
GGML_API void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props);
|
||||
GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
|
||||
GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
|
||||
|
||||
GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
|
||||
GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
|
||||
GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
|
||||
|
||||
//
|
||||
// Backend (reg)
|
||||
//
|
||||
|
||||
GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg);
|
||||
GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
|
||||
GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
|
||||
GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
|
||||
|
||||
// Common functions that may be obtained using ggml_backend_reg_get_proc_address
|
||||
|
||||
// Split buffer type for tensor parallelism
|
||||
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split);
|
||||
// Set the number of threads for the backend
|
||||
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
|
||||
// Get additional buffer types provided by the device (returns a NULL-terminated array)
|
||||
typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
|
||||
// Set the abort callback for the backend
|
||||
typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||
// Get a list of feature flags supported by the backend (returns a NULL-terminated array)
|
||||
struct ggml_backend_feature {
|
||||
const char * name;
|
||||
const char * value;
|
||||
};
|
||||
typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg);
|
||||
|
||||
//
|
||||
// Backend registry
|
||||
//
|
||||
|
||||
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
|
||||
|
||||
// Backend (reg) enumeration
|
||||
GGML_API size_t ggml_backend_reg_count(void);
|
||||
GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
|
||||
GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name);
|
||||
|
||||
// Device enumeration
|
||||
GGML_API size_t ggml_backend_dev_count(void);
|
||||
GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
|
||||
GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name);
|
||||
GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type);
|
||||
|
||||
// Direct backend (stream) initialization
|
||||
// = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
|
||||
GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
|
||||
// = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
|
||||
GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params);
|
||||
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
|
||||
GGML_API ggml_backend_t ggml_backend_init_best(void);
|
||||
|
||||
// Load a backend from a dynamic library and register it
|
||||
GGML_API ggml_backend_reg_t ggml_backend_load(const char * path);
|
||||
// Unload a backend if loaded dynamically and unregister it
|
||||
GGML_API void ggml_backend_unload(ggml_backend_reg_t reg);
|
||||
// Load all known backends from dynamic libraries
|
||||
GGML_API void ggml_backend_load_all(void);
|
||||
GGML_API void ggml_backend_load_all_from_path(const char * dir_path);
|
||||
|
||||
//
|
||||
// Backend scheduler
|
||||
//
|
||||
|
||||
// The backend scheduler allows for multiple backend devices to be used together
|
||||
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
|
||||
// The backends are selected based on:
|
||||
// - the backend that supports the operation
|
||||
// - the location of the pre-allocated tensors (e.g. the weights)
|
||||
/*
|
||||
Example usage:
|
||||
|
||||
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
|
||||
// preferrably to run on the same backend as the buffer
|
||||
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
||||
|
||||
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
|
||||
|
||||
// initialize buffers from a max size graph (optional)
|
||||
reserve_graph = build_graph(sched, max_batch_size);
|
||||
|
||||
// manually assign nodes to a backend (optional, should not be needed in most cases)
|
||||
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
||||
ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
|
||||
|
||||
ggml_backend_sched_reserve(sched, reserve_graph);
|
||||
|
||||
// compute
|
||||
graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically
|
||||
}
|
||||
|
||||
// if there are graph inputs:
|
||||
graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called)
|
||||
ggml_backend_sched_reset(sched); // clear the allocation of the previous graph
|
||||
ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it
|
||||
ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors
|
||||
ggml_backend_sched_graph_compute(sched, graph); // execute the graph
|
||||
|
||||
// as an alternative to the above it is also possible to assign the inputs to a dedicated context and
|
||||
// allocate them statically via ggml_backend_alloc_ctx_tensors
|
||||
}
|
||||
*/
|
||||
|
||||
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
||||
|
||||
// Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback)
|
||||
// when ask == true, the scheduler wants to know if the user wants to observe this node
|
||||
// this allows the scheduler to batch nodes together in order to evaluate them in a single call
|
||||
//
|
||||
// when ask == false, the scheduler is passing the node tensor to the user for observation
|
||||
// if the user returns false, the scheduler will cancel the graph compute
|
||||
//
|
||||
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
|
||||
|
||||
// Initialize a backend scheduler, backends with low index are given priority over backends with high index
|
||||
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
|
||||
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
||||
|
||||
// Initialize backend buffers from a measure graph
|
||||
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success
|
||||
|
||||
GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
|
||||
GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
|
||||
|
||||
// Get the number of splits of the last graph
|
||||
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
||||
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
|
||||
|
||||
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
||||
|
||||
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
||||
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
||||
|
||||
// Allocate and compute graph on the backend scheduler
|
||||
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success
|
||||
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
|
||||
|
||||
// Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph.
|
||||
// This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers.
|
||||
// The correct way to use this API is to discard the deallocated tensors and create new ones.
|
||||
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
||||
|
||||
// Set a callback to be called for each resulting node during graph compute
|
||||
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
|
||||
|
||||
//
|
||||
// Utils
|
||||
//
|
||||
|
||||
struct ggml_backend_graph_copy {
|
||||
ggml_backend_buffer_t buffer;
|
||||
struct ggml_context * ctx_allocated;
|
||||
struct ggml_context * ctx_unallocated;
|
||||
struct ggml_cgraph * graph;
|
||||
};
|
||||
|
||||
// Copy a graph to a different backend
|
||||
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
|
||||
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
|
||||
|
||||
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
||||
|
||||
// Compare the output of two backends
|
||||
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
||||
|
||||
// Tensor initialization
|
||||
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
||||
GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
|
||||
|
||||
// CPU buffer types are always available
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
25
dep/include/vall_e.cpp/ggml-blas.h
Normal file
25
dep/include/vall_e.cpp/ggml-blas.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// backend API
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
|
||||
|
||||
// number of threads used for conversion to float
|
||||
// for openblas and blis, this will also set the number of threads used for blas operations
|
||||
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
123
dep/include/vall_e.cpp/ggml-cann.h
Normal file
123
dep/include/vall_e.cpp/ggml-cann.h
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Maximum number of CANN devices supported.
|
||||
*/
|
||||
#define GGML_CANN_MAX_DEVICES 16
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void);
|
||||
|
||||
/**
|
||||
* @brief Initializes the CANN backend for a specified device.
|
||||
*
|
||||
* This function initializes the CANN backend for the given device.
|
||||
* It verifies the device index, allocates a context, and creates a backend
|
||||
* instance.
|
||||
*
|
||||
* @param device The index of the device to initialize.
|
||||
* @return A pointer to the initialized backend instance, or nullptr on failure.
|
||||
*/
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device);
|
||||
|
||||
/**
|
||||
* @brief Checks if a given backend is a CANN backend.
|
||||
*
|
||||
* This function verifies if the provided backend is a CANN backend by comparing
|
||||
* its GUID with the CANN backend's GUID.
|
||||
*
|
||||
* @param backend The backend instance to check.
|
||||
* @return True if the backend is a CANN backend, false otherwise.
|
||||
*/
|
||||
GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the CANN buffer type for a specified device.
|
||||
*
|
||||
* This function initializes and returns the buffer type interface associated
|
||||
* with the given device. It ensures thread-safe access using a mutex.
|
||||
*
|
||||
* @param device The device index for which to retrieve the buffer type.
|
||||
* @return A pointer to the buffer type interface for the specified device, or
|
||||
* nullptr if the device index is out of range.
|
||||
*/
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t
|
||||
ggml_backend_cann_buffer_type(int32_t device);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the number of CANN devices available.
|
||||
*
|
||||
* This function returns the number of CANN devices available based on
|
||||
* information obtained from `ggml_cann_info()`.
|
||||
*
|
||||
* @return The number of CANN devices available.
|
||||
*/
|
||||
GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void);
|
||||
|
||||
/**
|
||||
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
|
||||
*
|
||||
* @return A pointer to the host buffer type interface.
|
||||
*/
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the description of a specific CANN device.
|
||||
*
|
||||
* This function sets the specified device, retrieves the SoC name,
|
||||
* and writes it into the provided description buffer.
|
||||
*
|
||||
* @param device The device index to retrieve the description for.
|
||||
* @param description Pointer to a buffer where the description will be written.
|
||||
* @param description_size Size of the description buffer.
|
||||
*/
|
||||
GGML_BACKEND_API void ggml_backend_cann_get_device_description(
|
||||
int32_t device, char* description, size_t description_size);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the memory information of a specific CANN device.
|
||||
*
|
||||
* This function sets the specified device, retrieves the free and total
|
||||
* memory information of the specified type (ACL_HBM_MEM), and stores them
|
||||
* in the provided pointers.
|
||||
*
|
||||
* @param device The device index to retrieve memory information for.
|
||||
* @param free Pointer to a variable where the free memory size will be stored.
|
||||
* @param total Pointer to a variable where the total memory size will be
|
||||
* stored.
|
||||
*/
|
||||
GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device,
|
||||
size_t* free,
|
||||
size_t* total);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
39
dep/include/vall_e.cpp/ggml-cpp.h
Normal file
39
dep/include/vall_e.cpp/ggml-cpp.h
Normal file
@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef __cplusplus
|
||||
#error "This header is for C++ only"
|
||||
#endif
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-alloc.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "gguf.h"
|
||||
#include <memory>
|
||||
|
||||
// Smart pointers for ggml types
|
||||
|
||||
// ggml
|
||||
|
||||
struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } };
|
||||
struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } };
|
||||
|
||||
typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr;
|
||||
typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr;
|
||||
|
||||
// ggml-alloc
|
||||
|
||||
struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } };
|
||||
|
||||
typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr;
|
||||
|
||||
// ggml-backend
|
||||
|
||||
struct ggml_backend_deleter { void operator()(ggml_backend_t backend) { ggml_backend_free(backend); } };
|
||||
struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } };
|
||||
struct ggml_backend_event_deleter { void operator()(ggml_backend_event_t event) { ggml_backend_event_free(event); } };
|
||||
struct ggml_backend_sched_deleter { void operator()(ggml_backend_sched_t sched) { ggml_backend_sched_free(sched); } };
|
||||
|
||||
typedef std::unique_ptr<ggml_backend, ggml_backend_deleter> ggml_backend_ptr;
|
||||
typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr;
|
||||
typedef std::unique_ptr<ggml_backend_event, ggml_backend_event_deleter> ggml_backend_event_ptr;
|
||||
typedef std::unique_ptr<ggml_backend_sched, ggml_backend_sched_deleter> ggml_backend_sched_ptr;
|
||||
138
dep/include/vall_e.cpp/ggml-cpu.h
Normal file
138
dep/include/vall_e.cpp/ggml-cpu.h
Normal file
@ -0,0 +1,138 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// the compute plan that needs to be prepared for ggml_graph_compute()
|
||||
// since https://github.com/ggml-org/ggml/issues/287
|
||||
struct ggml_cplan {
|
||||
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
|
||||
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
|
||||
|
||||
int n_threads;
|
||||
struct ggml_threadpool * threadpool;
|
||||
|
||||
// abort ggml_graph_compute when true
|
||||
ggml_abort_callback abort_callback;
|
||||
void * abort_callback_data;
|
||||
};
|
||||
|
||||
// numa strategies
|
||||
enum ggml_numa_strategy {
|
||||
GGML_NUMA_STRATEGY_DISABLED = 0,
|
||||
GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
|
||||
GGML_NUMA_STRATEGY_ISOLATE = 2,
|
||||
GGML_NUMA_STRATEGY_NUMACTL = 3,
|
||||
GGML_NUMA_STRATEGY_MIRROR = 4,
|
||||
GGML_NUMA_STRATEGY_COUNT
|
||||
};
|
||||
|
||||
GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
|
||||
GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
||||
|
||||
GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
|
||||
GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
||||
|
||||
GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
||||
GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
||||
|
||||
GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
|
||||
GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
||||
|
||||
GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
||||
GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
|
||||
|
||||
GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
|
||||
GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
||||
|
||||
GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
||||
GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
||||
|
||||
GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
|
||||
GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
|
||||
GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
|
||||
GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
|
||||
GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
|
||||
|
||||
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
||||
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
||||
GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
|
||||
const struct ggml_cgraph * cgraph,
|
||||
int n_threads, /* = GGML_DEFAULT_N_THREADS */
|
||||
struct ggml_threadpool * threadpool /* = NULL */ );
|
||||
GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
|
||||
|
||||
// same as ggml_graph_compute() but the work data is allocated as a part of the context
|
||||
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
||||
GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
|
||||
|
||||
//
|
||||
// system info
|
||||
//
|
||||
|
||||
// x86
|
||||
GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_fma (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void);
|
||||
// ARM
|
||||
GGML_BACKEND_API int ggml_cpu_has_neon (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_sve (void);
|
||||
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
|
||||
GGML_BACKEND_API int ggml_cpu_has_sme (void);
|
||||
// other
|
||||
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
|
||||
|
||||
// Internal types and functions exposed for tests and benchmarks
|
||||
|
||||
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
|
||||
const void * GGML_RESTRICT y, size_t by, int nrc);
|
||||
|
||||
struct ggml_type_traits_cpu {
|
||||
ggml_from_float_t from_float;
|
||||
ggml_vec_dot_t vec_dot;
|
||||
enum ggml_type vec_dot_type;
|
||||
int64_t nrows; // number of rows to process simultaneously
|
||||
};
|
||||
|
||||
GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
|
||||
|
||||
GGML_BACKEND_API void ggml_cpu_init(void);
|
||||
|
||||
//
|
||||
// CPU backend
|
||||
//
|
||||
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend);
|
||||
GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
|
||||
GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
|
||||
GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
47
dep/include/vall_e.cpp/ggml-cuda.h
Normal file
47
dep/include/vall_e.cpp/ggml-cuda.h
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_HIP
|
||||
#define GGML_CUDA_NAME "ROCm"
|
||||
#define GGML_CUBLAS_NAME "hipBLAS"
|
||||
#elif defined(GGML_USE_MUSA)
|
||||
#define GGML_CUDA_NAME "MUSA"
|
||||
#define GGML_CUBLAS_NAME "muBLAS"
|
||||
#else
|
||||
#define GGML_CUDA_NAME "CUDA"
|
||||
#define GGML_CUBLAS_NAME "cuBLAS"
|
||||
#endif
|
||||
#define GGML_CUDA_MAX_DEVICES 16
|
||||
|
||||
// backend API
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_cuda_init(int device);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_cuda(ggml_backend_t backend);
|
||||
|
||||
// device buffer
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
|
||||
|
||||
// split tensor buffer that splits matrices by rows across multiple devices
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split);
|
||||
|
||||
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
|
||||
|
||||
GGML_BACKEND_API int ggml_backend_cuda_get_device_count(void);
|
||||
GGML_BACKEND_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
|
||||
GGML_BACKEND_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
|
||||
GGML_BACKEND_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
50
dep/include/vall_e.cpp/ggml-kompute.h
Normal file
50
dep/include/vall_e.cpp/ggml-kompute.h
Normal file
@ -0,0 +1,50 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GGML_KOMPUTE_MAX_DEVICES 16
|
||||
|
||||
struct ggml_vk_device {
|
||||
int index;
|
||||
int type; // same as VkPhysicalDeviceType
|
||||
size_t heapSize;
|
||||
const char * name;
|
||||
const char * vendor;
|
||||
int subgroupSize;
|
||||
uint64_t bufferAlignment;
|
||||
uint64_t maxAlloc;
|
||||
};
|
||||
|
||||
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
|
||||
bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
|
||||
bool ggml_vk_has_vulkan(void);
|
||||
bool ggml_vk_has_device(void);
|
||||
struct ggml_vk_device ggml_vk_current_device(void);
|
||||
|
||||
//
|
||||
// backend API
|
||||
//
|
||||
|
||||
// forward declaration
|
||||
typedef struct ggml_backend * ggml_backend_t;
|
||||
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
66
dep/include/vall_e.cpp/ggml-metal.h
Normal file
66
dep/include/vall_e.cpp/ggml-metal.h
Normal file
@ -0,0 +1,66 @@
|
||||
// Note: this description is outdated
|
||||
//
|
||||
// An interface allowing to compute ggml_cgraph with Metal
|
||||
//
|
||||
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
||||
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
|
||||
//
|
||||
// How it works?
|
||||
//
|
||||
// As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
|
||||
// interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
|
||||
// use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
|
||||
//
|
||||
// You only need to make sure that all memory buffers that you used during the graph creation
|
||||
// are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
|
||||
// used during the graph evaluation to determine the arguments of the compute kernels.
|
||||
//
|
||||
// Synchronization between device and host memory (for example for input and output tensors)
|
||||
// is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
struct ggml_tensor;
|
||||
struct ggml_cgraph;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//
|
||||
// backend API
|
||||
// user-code should use only these functions
|
||||
//
|
||||
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
||||
|
||||
GGML_DEPRECATED(
|
||||
GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
|
||||
"obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
|
||||
|
||||
GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
||||
|
||||
// helper to check if the device supports a specific family
|
||||
// ideally, the user code should be doing these checks
|
||||
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
||||
GGML_BACKEND_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
|
||||
|
||||
// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
|
||||
GGML_BACKEND_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_metal_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
26
dep/include/vall_e.cpp/ggml-opencl.h
Normal file
26
dep/include/vall_e.cpp/ggml-opencl.h
Normal file
@ -0,0 +1,26 @@
|
||||
#ifndef GGML_OPENCL_H
|
||||
#define GGML_OPENCL_H
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//
|
||||
// backend API
|
||||
//
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void);
|
||||
GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // GGML_OPENCL_H
|
||||
216
dep/include/vall_e.cpp/ggml-opt.h
Normal file
216
dep/include/vall_e.cpp/ggml-opt.h
Normal file
@ -0,0 +1,216 @@
|
||||
// This file contains functionality for training models using GGML.
|
||||
// It is not strictly needed vs. just vanilla GGML but it provides a more high-level interface for common needs such as datasets.
|
||||
// At the bottom of this file especially there are relatively high-level functions that are suitable use or adaptation in user code.
|
||||
//
|
||||
// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct ggml_opt_dataset;
|
||||
struct ggml_opt_context;
|
||||
struct ggml_opt_result;
|
||||
|
||||
typedef struct ggml_opt_dataset * ggml_opt_dataset_t;
|
||||
typedef struct ggml_opt_context * ggml_opt_context_t;
|
||||
typedef struct ggml_opt_result * ggml_opt_result_t;
|
||||
|
||||
// ====== Loss ======
|
||||
|
||||
// built-in loss types, i.e. the built-in quantities minimized by the optimizer
|
||||
// custom loss types can be defined via mean or sum which simply reduce the outputs for all datapoints to a single value
|
||||
enum ggml_opt_loss_type {
|
||||
GGML_OPT_LOSS_TYPE_MEAN,
|
||||
GGML_OPT_LOSS_TYPE_SUM,
|
||||
GGML_OPT_LOSS_TYPE_CROSS_ENTROPY,
|
||||
GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR,
|
||||
};
|
||||
|
||||
// ====== Dataset ======
|
||||
|
||||
GGML_API ggml_opt_dataset_t ggml_opt_dataset_init(
|
||||
int64_t ne_datapoint, // number of elements per datapoint
|
||||
int64_t ne_label, // number of elements per label
|
||||
int64_t ndata, // total number of datapoints/labels
|
||||
int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
|
||||
GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset);
|
||||
|
||||
// get underlying tensors that store the data
|
||||
GGML_API struct ggml_tensor * ggml_opt_dataset_data (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata]
|
||||
GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label, ndata]
|
||||
|
||||
// shuffle idata first datapoints from dataset with RNG from opt_ctx, shuffle all datapoints if idata is negative
|
||||
GGML_API void ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int64_t idata);
|
||||
|
||||
// get batch at position ibatch from dataset and copy the data to data_batch and labels_batch
|
||||
GGML_API void ggml_opt_dataset_get_batch(
|
||||
ggml_opt_dataset_t dataset,
|
||||
struct ggml_tensor * data_batch, // shape = [ne_datapoint, ndata_batch]
|
||||
struct ggml_tensor * labels_batch, // shape = [ne_label, ndata_batch]
|
||||
int64_t ibatch);
|
||||
|
||||
// ====== Model / Context ======
|
||||
|
||||
enum ggml_opt_build_type {
|
||||
GGML_OPT_BUILD_TYPE_FORWARD,
|
||||
GGML_OPT_BUILD_TYPE_GRAD,
|
||||
GGML_OPT_BUILD_TYPE_OPT,
|
||||
};
|
||||
|
||||
// parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
|
||||
struct ggml_opt_optimizer_params {
|
||||
// AdamW optimizer parameters
|
||||
struct {
|
||||
float alpha; // learning rate
|
||||
float beta1;
|
||||
float beta2;
|
||||
float eps; // epsilon for numerical stability
|
||||
float wd; // weight decay for AdamW, use 0.0f to disable
|
||||
} adamw;
|
||||
};
|
||||
|
||||
// callback to calculate optimizer parameters prior to a backward pass
|
||||
// userdata can be used to pass arbitrary data
|
||||
typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata);
|
||||
|
||||
// returns the default optimizer params (constant)
|
||||
// userdata is not used
|
||||
GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata);
|
||||
|
||||
// parameters for initializing a new optimization context
|
||||
struct ggml_opt_params {
|
||||
ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs
|
||||
|
||||
struct ggml_context * ctx_compute; // created in user code, holds non-static tensors
|
||||
|
||||
// the forward graph is defined by inputs and outputs
|
||||
// those tensors and all tensors inbetween are not intended to be reusable between multiple optimization contexts
|
||||
struct ggml_tensor * inputs;
|
||||
struct ggml_tensor * outputs;
|
||||
|
||||
enum ggml_opt_loss_type loss_type;
|
||||
enum ggml_opt_build_type build_type;
|
||||
|
||||
int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done
|
||||
|
||||
ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
|
||||
void * get_opt_pars_ud; // userdata for calculating optimizer parameters
|
||||
};
|
||||
|
||||
// get parameters for an optimization context with defaults set where possible
|
||||
// parameters for which no sensible defaults exist are supplied as arguments to this function
|
||||
GGML_API ggml_opt_params ggml_opt_default_params(
|
||||
ggml_backend_sched_t backend_sched,
|
||||
struct ggml_context * ctx_compute,
|
||||
struct ggml_tensor * inputs,
|
||||
struct ggml_tensor * outputs,
|
||||
enum ggml_opt_loss_type loss_type);
|
||||
|
||||
GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
|
||||
GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
|
||||
|
||||
// set gradients to zero, initilize loss, and optionally reset the optimizer
|
||||
GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
|
||||
|
||||
// get underlying tensors that store data
|
||||
GGML_API struct ggml_tensor * ggml_opt_inputs( ggml_opt_context_t opt_ctx); // forward graph input tensor
|
||||
GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor
|
||||
GGML_API struct ggml_tensor * ggml_opt_labels( ggml_opt_context_t opt_ctx); // labels to compare outputs against
|
||||
GGML_API struct ggml_tensor * ggml_opt_loss( ggml_opt_context_t opt_ctx); // scalar tensor that contains the loss
|
||||
GGML_API struct ggml_tensor * ggml_opt_pred( ggml_opt_context_t opt_ctx); // predictions made by outputs
|
||||
GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
|
||||
|
||||
// ====== Optimization Result ======
|
||||
|
||||
GGML_API ggml_opt_result_t ggml_opt_result_init();
|
||||
GGML_API void ggml_opt_result_free(ggml_opt_result_t result);
|
||||
GGML_API void ggml_opt_result_reset(ggml_opt_result_t result);
|
||||
|
||||
// get data from result, uncertainties are optional and can be ignored by passing NULL
|
||||
GGML_API void ggml_opt_result_ndata( ggml_opt_result_t result, int64_t * ndata); // writes 1 value, number of datapoints
|
||||
GGML_API void ggml_opt_result_loss( ggml_opt_result_t result, double * loss, double * unc); // writes 1 value
|
||||
GGML_API void ggml_opt_result_pred( ggml_opt_result_t result, int32_t * pred); // writes ndata values
|
||||
GGML_API void ggml_opt_result_accuracy(ggml_opt_result_t result, double * accuracy, double * unc); // writes 1 value
|
||||
|
||||
// ====== Computation ======
|
||||
|
||||
// do forward pass, increment result if not NULL
|
||||
GGML_API void ggml_opt_forward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
|
||||
|
||||
// do forward pass, increment result if not NULL, do backward pass
|
||||
GGML_API void ggml_opt_forward_backward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
|
||||
|
||||
// ############################################################################
|
||||
// ## The high-level functions start here. They do not depend on any private ##
|
||||
// ## functions or structs and can be copied to and adapted for user code. ##
|
||||
// ############################################################################
|
||||
|
||||
// ====== Intended Usage ======
|
||||
//
|
||||
// 1. Select the appropriate loss for your problem.
|
||||
// 2. Create a dataset and set the data for the "data" tensor. Also set the "labels" tensor if your loss needs them.
|
||||
// Setting the shard size to 1 will be fine, it's the granularity with which data is shuffled/loaded (bigger values are faster).
|
||||
// 3. Create a GGML graph for your model with no_alloc == true. Use two separate contexts for the tensors.
|
||||
// The first context should contain the model parameters and inputs and be allocated statically in user code.
|
||||
// The second context should contain all other tensors and will be (re)allocated automatically.
|
||||
// Due to this automated allocation the data of the second context is not defined when accessed in user code.
|
||||
// Note that the second dimension of the inputs/outputs are interpreted as the number of datapoints in those tensors.
|
||||
// 4. Call ggml_opt_fit. If you need more control you can use ggml_opt_epoch instead.
|
||||
|
||||
// signature for a callback while evaluating opt_ctx on dataset, called after an evaluation
|
||||
typedef void (*ggml_opt_epoch_callback)(
|
||||
bool train, // true after training evaluation, false after validation evaluation
|
||||
ggml_opt_context_t opt_ctx,
|
||||
ggml_opt_dataset_t dataset,
|
||||
ggml_opt_result_t result, // result associated with the dataset subsection
|
||||
int64_t ibatch, // number of batches that have been evaluated so far
|
||||
int64_t ibatch_max, // total number of batches in this dataset subsection
|
||||
int64_t t_start_us); // time at which the evaluation on the dataset subsection was started
|
||||
|
||||
// do training on front of dataset, do evaluation only on back of dataset
|
||||
GGML_API void ggml_opt_epoch(
|
||||
ggml_opt_context_t opt_ctx,
|
||||
ggml_opt_dataset_t dataset,
|
||||
ggml_opt_result_t result_train, // result to increment during training, ignored if NULL
|
||||
ggml_opt_result_t result_eval, // result to increment during evaluation, ignored if NULL
|
||||
int64_t idata_split, // data index at which to split training and evaluation
|
||||
ggml_opt_epoch_callback callback_train,
|
||||
ggml_opt_epoch_callback callback_eval);
|
||||
|
||||
// callback that prints a progress bar on stderr
|
||||
GGML_API void ggml_opt_epoch_callback_progress_bar(
|
||||
bool train,
|
||||
ggml_opt_context_t opt_ctx,
|
||||
ggml_opt_dataset_t dataset,
|
||||
ggml_opt_result_t result,
|
||||
int64_t ibatch,
|
||||
int64_t ibatch_max,
|
||||
int64_t t_start_us);
|
||||
|
||||
// fit model defined by inputs and outputs to dataset
|
||||
GGML_API void ggml_opt_fit(
|
||||
ggml_backend_sched_t backend_sched, // backend scheduler for constructing the compute graphs
|
||||
ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs
|
||||
ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch]
|
||||
ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
|
||||
ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
|
||||
enum ggml_opt_loss_type loss_type, // loss to minimize
|
||||
ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
|
||||
int64_t nepoch, // how many times the dataset should be iterated over
|
||||
int64_t nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
|
||||
float val_split, // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
|
||||
bool silent); // whether or not info prints to stderr should be suppressed
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
30
dep/include/vall_e.cpp/ggml-rpc.h
Normal file
30
dep/include/vall_e.cpp/ggml-rpc.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GGML_RPC_MAX_SERVERS 16
|
||||
|
||||
// backend API
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
|
||||
GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
|
||||
|
||||
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
|
||||
|
||||
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
|
||||
const char * cache_dir,
|
||||
size_t free_mem, size_t total_mem);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
49
dep/include/vall_e.cpp/ggml-sycl.h
Normal file
49
dep/include/vall_e.cpp/ggml-sycl.h
Normal file
@ -0,0 +1,49 @@
|
||||
//
|
||||
// MIT license
|
||||
// Copyright (C) 2024 Intel Corporation
|
||||
// SPDX-License-Identifier: MIT
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#define GGML_SYCL_NAME "SYCL"
|
||||
#define GGML_SYCL_MAX_DEVICES 48
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// backend API
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_sycl_init(int device);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_sycl(ggml_backend_t backend);
|
||||
|
||||
// devide buffer
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
|
||||
|
||||
// split tensor buffer that splits matrices by rows across multiple devices
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
||||
|
||||
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
||||
|
||||
GGML_BACKEND_API void ggml_backend_sycl_print_sycl_devices(void);
|
||||
GGML_BACKEND_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len);
|
||||
GGML_BACKEND_API void ggml_backend_sycl_get_device_description(int device,
|
||||
char *description,
|
||||
size_t description_size);
|
||||
GGML_BACKEND_API int ggml_backend_sycl_get_device_count();
|
||||
GGML_BACKEND_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||
|
||||
// SYCL doesn't support registering host memory, keep here for reference
|
||||
// GGML_BACKEND_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
||||
// GGML_BACKEND_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_sycl_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
29
dep/include/vall_e.cpp/ggml-vulkan.h
Normal file
29
dep/include/vall_e.cpp/ggml-vulkan.h
Normal file
@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GGML_VK_NAME "Vulkan"
|
||||
#define GGML_VK_MAX_DEVICES 16
|
||||
|
||||
// backend API
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_vk(ggml_backend_t backend);
|
||||
GGML_BACKEND_API int ggml_backend_vk_get_device_count(void);
|
||||
GGML_BACKEND_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
|
||||
GGML_BACKEND_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
|
||||
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_vk_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
2221
dep/include/vall_e.cpp/ggml.h
Normal file
2221
dep/include/vall_e.cpp/ggml.h
Normal file
File diff suppressed because it is too large
Load Diff
30
dep/include/vall_e.cpp/llama-cpp.h
Normal file
30
dep/include/vall_e.cpp/llama-cpp.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef __cplusplus
|
||||
#error "This header is for C++ only"
|
||||
#endif
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "llama.h"
|
||||
|
||||
struct llama_model_deleter {
|
||||
void operator()(llama_model * model) { llama_model_free(model); }
|
||||
};
|
||||
|
||||
struct llama_context_deleter {
|
||||
void operator()(llama_context * context) { llama_free(context); }
|
||||
};
|
||||
|
||||
struct llama_sampler_deleter {
|
||||
void operator()(llama_sampler * sampler) { llama_sampler_free(sampler); }
|
||||
};
|
||||
|
||||
struct llama_adapter_lora_deleter {
|
||||
void operator()(llama_adapter_lora * adapter) { llama_adapter_lora_free(adapter); }
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;
|
||||
typedef std::unique_ptr<llama_context, llama_context_deleter> llama_context_ptr;
|
||||
typedef std::unique_ptr<llama_sampler, llama_sampler_deleter> llama_sampler_ptr;
|
||||
typedef std::unique_ptr<llama_adapter_lora, llama_adapter_lora_deleter> llama_adapter_lora_ptr;
|
||||
61
dep/include/vall_e.cpp/llama-impl.h
Normal file
61
dep/include/vall_e.cpp/llama-impl.h
Normal file
@ -0,0 +1,61 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h" // for ggml_log_level
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __GNUC__
|
||||
# if defined(__MINGW32__) && !defined(__clang__)
|
||||
# define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
||||
# else
|
||||
# define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
||||
# endif
|
||||
#else
|
||||
# define LLAMA_ATTRIBUTE_FORMAT(...)
|
||||
#endif
|
||||
|
||||
//
|
||||
// logging
|
||||
//
|
||||
|
||||
LLAMA_ATTRIBUTE_FORMAT(2, 3)
|
||||
void llama_log_internal (ggml_log_level level, const char * format, ...);
|
||||
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
|
||||
|
||||
#define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
|
||||
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
||||
#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
|
||||
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
|
||||
#define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
|
||||
#define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
|
||||
|
||||
//
|
||||
// helpers
|
||||
//
|
||||
|
||||
template <typename T>
|
||||
struct no_init {
|
||||
T value;
|
||||
no_init() { /* do nothing */ }
|
||||
};
|
||||
|
||||
struct time_meas {
|
||||
time_meas(int64_t & t_acc, bool disable = false);
|
||||
~time_meas();
|
||||
|
||||
const int64_t t_start_us;
|
||||
|
||||
int64_t & t_acc;
|
||||
};
|
||||
|
||||
void replace_all(std::string & s, const std::string & search, const std::string & replace);
|
||||
|
||||
// TODO: rename to llama_format ?
|
||||
LLAMA_ATTRIBUTE_FORMAT(1, 2)
|
||||
std::string format(const char * fmt, ...);
|
||||
|
||||
std::string llama_format_tensor_shape(const std::vector<int64_t> & ne);
|
||||
std::string llama_format_tensor_shape(const struct ggml_tensor * t);
|
||||
|
||||
std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i);
|
||||
125
dep/include/vall_e.cpp/llama-vocab.h
Normal file
125
dep/include/vall_e.cpp/llama-vocab.h
Normal file
@ -0,0 +1,125 @@
|
||||
#pragma once
|
||||
|
||||
#include "llama.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
struct LLM_KV;
|
||||
struct llama_model_loader;
|
||||
|
||||
struct llama_vocab {
|
||||
struct token_data {
|
||||
std::string text;
|
||||
float score;
|
||||
llama_token_attr attr;
|
||||
};
|
||||
|
||||
llama_vocab();
|
||||
~llama_vocab();
|
||||
|
||||
void load(llama_model_loader & ml, const LLM_KV & kv);
|
||||
|
||||
enum llama_vocab_type get_type() const;
|
||||
enum llama_vocab_pre_type get_pre_type() const;
|
||||
|
||||
uint32_t n_tokens() const;
|
||||
uint32_t n_token_types() const;
|
||||
|
||||
std::string type_name() const;
|
||||
|
||||
bool is_normal (llama_token id) const;
|
||||
bool is_unknown (llama_token id) const;
|
||||
bool is_control (llama_token id) const;
|
||||
bool is_byte (llama_token id) const;
|
||||
bool is_user_defined(llama_token id) const;
|
||||
bool is_unused (llama_token id) const;
|
||||
bool is_eog (llama_token id) const;
|
||||
|
||||
uint8_t token_to_byte(llama_token id) const;
|
||||
llama_token byte_to_token(uint8_t ch) const;
|
||||
|
||||
llama_token text_to_token(const std::string & text) const;
|
||||
|
||||
const token_data & get_token_data(llama_token id) const;
|
||||
|
||||
const char * token_get_text (llama_token id) const;
|
||||
float token_get_score(llama_token id) const;
|
||||
llama_token_attr token_get_attr (llama_token id) const;
|
||||
|
||||
llama_token token_bos() const;
|
||||
llama_token token_eos() const;
|
||||
llama_token token_eot() const;
|
||||
llama_token token_eom() const;
|
||||
llama_token token_unk() const;
|
||||
llama_token token_sep() const;
|
||||
llama_token token_nl () const;
|
||||
llama_token token_pad() const;
|
||||
|
||||
llama_token token_prefix() const;
|
||||
llama_token token_middle() const;
|
||||
llama_token token_suffix() const;
|
||||
|
||||
llama_token token_fim_pre() const;
|
||||
llama_token token_fim_suf() const;
|
||||
llama_token token_fim_mid() const;
|
||||
llama_token token_fim_pad() const;
|
||||
llama_token token_fim_rep() const;
|
||||
llama_token token_fim_sep() const;
|
||||
|
||||
bool get_add_space_prefix () const;
|
||||
bool get_add_bos () const;
|
||||
bool get_add_eos () const;
|
||||
bool get_ignore_merges () const;
|
||||
bool get_clean_spaces () const;
|
||||
bool get_remove_extra_whitespaces () const;
|
||||
bool get_escape_whitespaces () const;
|
||||
bool get_treat_whitespace_as_suffix() const;
|
||||
|
||||
int max_token_len() const;
|
||||
|
||||
int find_bpe_rank(const std::string & token_left, const std::string & token_right) const;
|
||||
|
||||
int32_t tokenize(
|
||||
const char * text,
|
||||
int32_t text_len,
|
||||
llama_token * tokens,
|
||||
int32_t n_tokens_max,
|
||||
bool add_special,
|
||||
bool parse_special) const;
|
||||
|
||||
std::vector<llama_token> tokenize(
|
||||
const std::string & raw_text,
|
||||
bool add_special,
|
||||
bool parse_special = false) const;
|
||||
|
||||
// does not write null-terminator to buf
|
||||
int32_t token_to_piece(
|
||||
llama_token token,
|
||||
char * buf,
|
||||
int32_t length,
|
||||
int32_t lstrip,
|
||||
bool special) const;
|
||||
|
||||
// use cached data
|
||||
const std::string & token_to_piece(llama_token token) const;
|
||||
|
||||
int32_t detokenize(
|
||||
const llama_token * tokens,
|
||||
int32_t n_tokens,
|
||||
char * text,
|
||||
int32_t text_len_max,
|
||||
bool remove_special,
|
||||
bool unparse_special) const;
|
||||
|
||||
std::string detokenize(
|
||||
const std::vector<llama_token> & tokens,
|
||||
bool special) const;
|
||||
|
||||
void print_info() const;
|
||||
|
||||
private:
|
||||
struct impl;
|
||||
std::unique_ptr<impl> pimpl;
|
||||
};
|
||||
1431
dep/include/vall_e.cpp/llama.h
Normal file
1431
dep/include/vall_e.cpp/llama.h
Normal file
File diff suppressed because it is too large
Load Diff
358
dep/include/vall_e.cpp/llama_hack.h
Normal file
358
dep/include/vall_e.cpp/llama_hack.h
Normal file
@ -0,0 +1,358 @@
|
||||
#pragma once
|
||||
|
||||
#include "llama-vocab.h"
|
||||
#include <array>
|
||||
|
||||
/* Begin cringe so I can access the model's tok_embd */
|
||||
// it needs to be copied so the struct layout is exactly as it is under llama.cpp
|
||||
#define LLAMA_MAX_LAYERS 512
|
||||
#define LLAMA_MAX_EXPERTS 160 // DeepSeekV2
|
||||
|
||||
enum llm_type {
|
||||
LLM_TYPE_UNKNOWN,
|
||||
};
|
||||
|
||||
enum llm_arch {
|
||||
LLM_ARCH_UNKNOWN,
|
||||
};
|
||||
|
||||
enum llama_expert_gating_func_type {
|
||||
LLAMA_EXPERT_GATING_FUNC_TYPE_NONE = 0,
|
||||
LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX = 1,
|
||||
LLAMA_EXPERT_GATING_FUNC_TYPE_SIGMOID = 2,
|
||||
};
|
||||
|
||||
struct llama_hparams_posnet {
|
||||
uint32_t n_embd;
|
||||
uint32_t n_layer;
|
||||
};
|
||||
|
||||
struct llama_hparams_convnext {
|
||||
uint32_t n_embd;
|
||||
uint32_t n_layer;
|
||||
};
|
||||
|
||||
struct llama_hparams {
|
||||
bool vocab_only;
|
||||
bool rope_finetuned;
|
||||
bool use_par_res;
|
||||
bool swin_norm;
|
||||
|
||||
uint32_t n_ctx_train; // context size the model was trained on
|
||||
uint32_t n_embd;
|
||||
uint32_t n_embd_features = 0;
|
||||
uint32_t n_layer;
|
||||
uint32_t n_rot;
|
||||
uint32_t n_swa = 0; // sliding window attention (SWA)
|
||||
uint32_t n_swa_pattern = 1; // by default, all layers use non-sliding-window attention
|
||||
uint32_t n_embd_head_k; // dimension of keys (d_k). d_q is assumed to be the same, but there are n_head q heads, and only n_head_kv k-v heads
|
||||
uint32_t n_embd_head_v; // dimension of values (d_v) aka n_embd_head
|
||||
uint32_t n_expert = 0;
|
||||
uint32_t n_expert_used = 0;
|
||||
uint32_t n_rel_attn_bkts = 0;
|
||||
|
||||
// for WavTokenizer
|
||||
struct llama_hparams_posnet posnet;
|
||||
struct llama_hparams_convnext convnext;
|
||||
|
||||
std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_arr;
|
||||
std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_kv_arr;
|
||||
std::array<uint32_t, LLAMA_MAX_LAYERS> n_ff_arr;
|
||||
|
||||
uint32_t n_layer_dense_lead = 0;
|
||||
uint32_t n_lora_q = 0;
|
||||
uint32_t n_lora_kv = 0;
|
||||
uint32_t n_ff_exp = 0;
|
||||
uint32_t n_ff_shexp = 0;
|
||||
uint32_t n_expert_shared = 0;
|
||||
uint32_t n_norm_groups = 0;
|
||||
|
||||
float expert_weights_scale = 0.0;
|
||||
bool expert_weights_norm = false;
|
||||
uint32_t expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_NONE;
|
||||
|
||||
float f_norm_eps;
|
||||
float f_norm_rms_eps;
|
||||
float f_norm_group_eps;
|
||||
|
||||
float f_attn_logit_softcapping = 50.0f;
|
||||
float f_final_logit_softcapping = 30.0f;
|
||||
|
||||
// for RWKV
|
||||
uint32_t rescale_every_n_layers = 0;
|
||||
uint32_t time_mix_extra_dim = 0;
|
||||
uint32_t time_decay_extra_dim = 0;
|
||||
uint32_t wkv_head_size = 0;
|
||||
uint32_t token_shift_count = 2;
|
||||
uint32_t n_lora_decay = 0;
|
||||
uint32_t n_lora_iclr = 0;
|
||||
uint32_t n_lora_value_res_mix = 0;
|
||||
uint32_t n_lora_gate = 0;
|
||||
|
||||
float rope_attn_factor = 1.0f;
|
||||
float rope_freq_base_train;
|
||||
float rope_freq_base_train_swa;
|
||||
float rope_freq_scale_train;
|
||||
float rope_freq_scale_train_swa;
|
||||
uint32_t n_ctx_orig_yarn;
|
||||
float rope_yarn_log_mul;
|
||||
|
||||
std::array<int, 4> rope_sections;
|
||||
|
||||
// for State Space Models
|
||||
uint32_t ssm_d_conv = 0;
|
||||
uint32_t ssm_d_inner = 0;
|
||||
uint32_t ssm_d_state = 0;
|
||||
uint32_t ssm_dt_rank = 0;
|
||||
|
||||
bool ssm_dt_b_c_rms = false;
|
||||
|
||||
float f_clamp_kqv = 0.0f;
|
||||
float f_max_alibi_bias = 0.0f;
|
||||
float f_logit_scale = 0.0f;
|
||||
|
||||
// Additional scale factors (Granite/Granite MoE)
|
||||
float f_residual_scale = 0.0f;
|
||||
float f_embedding_scale = 0.0f;
|
||||
float f_attention_scale = 0.0f;
|
||||
|
||||
bool causal_attn = true;
|
||||
bool use_alibi = false;
|
||||
bool attn_soft_cap = false;
|
||||
|
||||
// needed by encoder-decoder models (e.g. T5, FLAN-T5)
|
||||
// ref: https://github.com/ggerganov/llama.cpp/pull/8141
|
||||
llama_token dec_start_token_id = LLAMA_TOKEN_NULL;
|
||||
|
||||
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE;
|
||||
enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;
|
||||
enum llama_rope_scaling_type rope_scaling_type_train = LLAMA_ROPE_SCALING_TYPE_NONE;
|
||||
|
||||
uint32_t n_head(uint32_t il = 0) const;
|
||||
|
||||
uint32_t n_head_kv(uint32_t il = 0) const;
|
||||
|
||||
uint32_t n_ff(uint32_t il = 0) const;
|
||||
|
||||
uint32_t n_gqa(uint32_t il = 0) const;
|
||||
|
||||
// dimension of key embeddings across all k-v heads
|
||||
uint32_t n_embd_k_gqa(uint32_t il = 0) const;
|
||||
|
||||
// dimension of value embeddings across all k-v heads
|
||||
uint32_t n_embd_v_gqa(uint32_t il = 0) const;
|
||||
|
||||
// dimension of the rolling state embeddings
|
||||
// corresponds to Mamba's conv_states size or RWKV's token_shift states size
|
||||
uint32_t n_embd_k_s() const;
|
||||
|
||||
// dimension of the recurrent state embeddings
|
||||
uint32_t n_embd_v_s() const;
|
||||
|
||||
bool is_swa(uint32_t il) const;
|
||||
};
|
||||
|
||||
struct llama_model {
|
||||
llm_type type = LLM_TYPE_UNKNOWN;
|
||||
llm_arch arch = LLM_ARCH_UNKNOWN;
|
||||
|
||||
std::string name = "n/a";
|
||||
|
||||
llama_hparams hparams = {};
|
||||
llama_vocab vocab;
|
||||
|
||||
struct ggml_tensor * tok_embd = nullptr;
|
||||
struct ggml_tensor * type_embd = nullptr;
|
||||
struct ggml_tensor * pos_embd = nullptr;
|
||||
struct ggml_tensor * tok_norm = nullptr;
|
||||
struct ggml_tensor * tok_norm_b = nullptr;
|
||||
|
||||
struct ggml_tensor * output_norm = nullptr;
|
||||
struct ggml_tensor * output_norm_b = nullptr;
|
||||
struct ggml_tensor * output = nullptr;
|
||||
struct ggml_tensor * output_b = nullptr;
|
||||
struct ggml_tensor * output_norm_enc = nullptr;
|
||||
|
||||
// classifier
|
||||
struct ggml_tensor * cls = nullptr;
|
||||
struct ggml_tensor * cls_b = nullptr;
|
||||
struct ggml_tensor * cls_out = nullptr;
|
||||
struct ggml_tensor * cls_out_b = nullptr;
|
||||
|
||||
struct ggml_tensor * conv1d = nullptr;
|
||||
struct ggml_tensor * conv1d_b = nullptr;
|
||||
};
|
||||
|
||||
struct llama_vocab_hack {
|
||||
struct token_data {
|
||||
std::string text;
|
||||
float score;
|
||||
llama_token_attr attr;
|
||||
};
|
||||
|
||||
llama_vocab_hack();
|
||||
~llama_vocab_hack();
|
||||
|
||||
void load(llama_model_loader & ml, const LLM_KV & kv);
|
||||
|
||||
enum llama_vocab_type get_type() const;
|
||||
enum llama_vocab_pre_type get_pre_type() const;
|
||||
|
||||
uint32_t n_tokens() const;
|
||||
uint32_t n_token_types() const;
|
||||
|
||||
std::string type_name() const;
|
||||
|
||||
bool is_normal (llama_token id) const;
|
||||
bool is_unknown (llama_token id) const;
|
||||
bool is_control (llama_token id) const;
|
||||
bool is_byte (llama_token id) const;
|
||||
bool is_user_defined(llama_token id) const;
|
||||
bool is_unused (llama_token id) const;
|
||||
bool is_eog (llama_token id) const;
|
||||
|
||||
uint8_t token_to_byte(llama_token id) const;
|
||||
llama_token byte_to_token(uint8_t ch) const;
|
||||
|
||||
llama_token text_to_token(const std::string & text) const;
|
||||
|
||||
const token_data & get_token_data(llama_token id) const;
|
||||
|
||||
const char * token_get_text (llama_token id) const;
|
||||
float token_get_score(llama_token id) const;
|
||||
llama_token_attr token_get_attr (llama_token id) const;
|
||||
|
||||
llama_token token_bos() const;
|
||||
llama_token token_eos() const;
|
||||
llama_token token_eot() const;
|
||||
llama_token token_eom() const;
|
||||
llama_token token_unk() const;
|
||||
llama_token token_sep() const;
|
||||
llama_token token_nl () const;
|
||||
llama_token token_pad() const;
|
||||
|
||||
llama_token token_prefix() const;
|
||||
llama_token token_middle() const;
|
||||
llama_token token_suffix() const;
|
||||
|
||||
llama_token token_fim_pre() const;
|
||||
llama_token token_fim_suf() const;
|
||||
llama_token token_fim_mid() const;
|
||||
llama_token token_fim_pad() const;
|
||||
llama_token token_fim_rep() const;
|
||||
llama_token token_fim_sep() const;
|
||||
|
||||
bool get_add_space_prefix () const;
|
||||
bool get_add_bos () const;
|
||||
bool get_add_eos () const;
|
||||
bool get_ignore_merges () const;
|
||||
bool get_clean_spaces () const;
|
||||
bool get_remove_extra_whitespaces () const;
|
||||
bool get_escape_whitespaces () const;
|
||||
bool get_treat_whitespace_as_suffix() const;
|
||||
|
||||
int max_token_len() const;
|
||||
|
||||
int find_bpe_rank(const std::string & token_left, const std::string & token_right) const;
|
||||
|
||||
int32_t tokenize(
|
||||
const char * text,
|
||||
int32_t text_len,
|
||||
llama_token * tokens,
|
||||
int32_t n_tokens_max,
|
||||
bool add_special,
|
||||
bool parse_special) const;
|
||||
|
||||
std::vector<llama_token> tokenize(
|
||||
const std::string & raw_text,
|
||||
bool add_special,
|
||||
bool parse_special = false) const;
|
||||
|
||||
// does not write null-terminator to buf
|
||||
int32_t token_to_piece(
|
||||
llama_token token,
|
||||
char * buf,
|
||||
int32_t length,
|
||||
int32_t lstrip,
|
||||
bool special) const;
|
||||
|
||||
// use cached data
|
||||
const std::string & token_to_piece(llama_token token) const;
|
||||
|
||||
int32_t detokenize(
|
||||
const llama_token * tokens,
|
||||
int32_t n_tokens,
|
||||
char * text,
|
||||
int32_t text_len_max,
|
||||
bool remove_special,
|
||||
bool unparse_special) const;
|
||||
|
||||
std::string detokenize(
|
||||
const std::vector<llama_token> & tokens,
|
||||
bool special) const;
|
||||
|
||||
void print_info() const;
|
||||
|
||||
struct impl {
|
||||
uint32_t n_token_types = 0; // for BERT-style token types
|
||||
|
||||
enum llama_vocab_type type = LLAMA_VOCAB_TYPE_SPM;
|
||||
enum llama_vocab_pre_type pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
||||
|
||||
int max_token_len = 0; // used for optimizing longest token search
|
||||
|
||||
// default LLaMA special tokens
|
||||
// TODO: should we set all of these to LLAMA_TOKEN_NULL?
|
||||
llama_token special_bos_id = 1;
|
||||
llama_token special_eos_id = 2;
|
||||
llama_token special_eot_id = LLAMA_TOKEN_NULL;
|
||||
llama_token special_eom_id = LLAMA_TOKEN_NULL;
|
||||
llama_token special_unk_id = 0;
|
||||
llama_token special_sep_id = LLAMA_TOKEN_NULL;
|
||||
llama_token special_pad_id = LLAMA_TOKEN_NULL;
|
||||
llama_token special_mask_id = LLAMA_TOKEN_NULL;
|
||||
|
||||
llama_token linefeed_id = 13;
|
||||
|
||||
// fim tokens
|
||||
llama_token special_fim_pre_id = LLAMA_TOKEN_NULL;
|
||||
llama_token special_fim_suf_id = LLAMA_TOKEN_NULL;
|
||||
llama_token special_fim_mid_id = LLAMA_TOKEN_NULL;
|
||||
llama_token special_fim_pad_id = LLAMA_TOKEN_NULL;
|
||||
llama_token special_fim_rep_id = LLAMA_TOKEN_NULL; // repo
|
||||
llama_token special_fim_sep_id = LLAMA_TOKEN_NULL; // file separator
|
||||
|
||||
// tokenizer flags
|
||||
bool add_space_prefix = false;
|
||||
bool add_bos = false;
|
||||
bool add_eos = false;
|
||||
bool ignore_merges = false;
|
||||
bool clean_spaces = false; // clean_up_tokenization_spaces
|
||||
bool remove_extra_whitespaces = false;
|
||||
bool escape_whitespaces = true;
|
||||
bool treat_whitespace_as_suffix = false;
|
||||
|
||||
std::unordered_map<std::string, llama_token> token_to_id;
|
||||
std::vector<token_data> id_to_token;
|
||||
};
|
||||
std::unique_ptr<impl> pimpl;
|
||||
};
|
||||
|
||||
/* BEGIN VALL-E SPECIFIC HELPERS */
|
||||
struct ggml_tensor * llama_get_embedding_weights(struct llama_model * model) {
|
||||
return model->tok_embd;
|
||||
}
|
||||
struct ggml_tensor * llama_get_output_head_tensor(struct llama_model * model ) {
|
||||
return model->output;
|
||||
}
|
||||
void llama_set_output_head(struct llama_model * model, struct ggml_tensor* tensor ) {
|
||||
// set the output tensor
|
||||
model->output = tensor;
|
||||
// required to properly output logits
|
||||
llama_vocab_hack* vocab = (llama_vocab_hack*) const_cast<llama_vocab*>(llama_model_get_vocab( model ));
|
||||
vocab->pimpl->id_to_token.resize( tensor->ne[1] );
|
||||
// *const_cast<uint32_t*>(&model->hparams.n_vocab) = tensor->ne[1];
|
||||
}
|
||||
/* END VALL-E SPECIFIC HELPERS */
|
||||
|
||||
/* End cringe code */
|
||||
78
dep/include/vall_e.cpp/lstm.h
Normal file
78
dep/include/vall_e.cpp/lstm.h
Normal file
@ -0,0 +1,78 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-alloc.h"
|
||||
|
||||
#include "ops.h"
|
||||
|
||||
struct encodec_lstm {
|
||||
struct ggml_tensor *l0_ih_w;
|
||||
struct ggml_tensor *l0_hh_w;
|
||||
|
||||
struct ggml_tensor *l0_ih_b;
|
||||
struct ggml_tensor *l0_hh_b;
|
||||
|
||||
struct ggml_tensor *l1_ih_w;
|
||||
struct ggml_tensor *l1_hh_w;
|
||||
|
||||
struct ggml_tensor *l1_ih_b;
|
||||
struct ggml_tensor *l1_hh_b;
|
||||
};
|
||||
|
||||
struct ggml_tensor *forward_pass_lstm_unilayer(struct ggml_context *ctx0,
|
||||
struct ggml_tensor *inp,
|
||||
struct ggml_tensor *weight_ih,
|
||||
struct ggml_tensor *weight_hh,
|
||||
struct ggml_tensor *bias_ih,
|
||||
struct ggml_tensor *bias_hh,
|
||||
char *prefix) {
|
||||
const int seq_length = inp->ne[0];
|
||||
const int input_dim = inp->ne[1];
|
||||
const int hidden_dim = weight_ih->ne[1] / 4;
|
||||
|
||||
char ct_name[10];
|
||||
char ht_name[10];
|
||||
|
||||
snprintf(ct_name, 10, "%s_ct", prefix);
|
||||
snprintf(ht_name, 10, "%s_ht", prefix);
|
||||
|
||||
struct ggml_tensor *hs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length);
|
||||
ggml_set_input(hs);
|
||||
|
||||
struct ggml_tensor *c_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim);
|
||||
ggml_set_input(c_t);
|
||||
ggml_set_name(c_t, ct_name);
|
||||
|
||||
struct ggml_tensor *h_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim);
|
||||
ggml_set_input(h_t);
|
||||
ggml_set_name(h_t, ht_name);
|
||||
|
||||
struct ggml_tensor *current = ggml_cont(ctx0, ggml_transpose(ctx0, inp));
|
||||
|
||||
for (int t = 0; t < seq_length; t++) {
|
||||
struct ggml_tensor *x_t = ggml_view_1d(ctx0, current, input_dim, t * current->nb[1]);
|
||||
|
||||
struct ggml_tensor *inp_gates = ggml_mul_mat(ctx0, weight_ih, x_t);
|
||||
inp_gates = ggml_add(ctx0, inp_gates, bias_ih);
|
||||
|
||||
struct ggml_tensor *hid_gates = ggml_mul_mat(ctx0, weight_hh, h_t);
|
||||
hid_gates = ggml_add(ctx0, hid_gates, bias_hh);
|
||||
|
||||
struct ggml_tensor *out_gates = ggml_add(ctx0, inp_gates, hid_gates);
|
||||
|
||||
struct ggml_tensor *i_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 0 * sizeof(float) * hidden_dim));
|
||||
struct ggml_tensor *f_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 1 * sizeof(float) * hidden_dim));
|
||||
struct ggml_tensor *g_t = ggml_tanh(ctx0 , ggml_view_1d(ctx0, out_gates, hidden_dim, 2 * sizeof(float) * hidden_dim));
|
||||
struct ggml_tensor *o_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 3 * sizeof(float) * hidden_dim));
|
||||
|
||||
c_t = ggml_add(ctx0, ggml_mul(ctx0, f_t, c_t), ggml_mul(ctx0, i_t, g_t));
|
||||
|
||||
h_t = ggml_mul(ctx0, o_t, ggml_tanh(ctx0, c_t));
|
||||
|
||||
hs = ggml_set_1d(ctx0, hs, h_t, t * hs->nb[1]);
|
||||
}
|
||||
|
||||
hs = ggml_cont(ctx0, ggml_transpose(ctx0, hs));
|
||||
|
||||
return hs;
|
||||
}
|
||||
17
dep/include/vall_e.cpp/ops.h
Normal file
17
dep/include/vall_e.cpp/ops.h
Normal file
@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
struct ggml_tensor *pad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
|
||||
int padding_left, int padding_right);
|
||||
|
||||
struct ggml_tensor *unpad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
|
||||
int padding_left, int padding_right);
|
||||
|
||||
struct ggml_tensor *strided_conv_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
|
||||
struct ggml_tensor *conv_w, struct ggml_tensor *conv_b,
|
||||
int stride);
|
||||
|
||||
struct ggml_tensor *strided_conv_transpose_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
|
||||
struct ggml_tensor *conv_w, struct ggml_tensor *conv_b,
|
||||
int stride);
|
||||
111
dep/include/vall_e.cpp/quantizer.h
Normal file
111
dep/include/vall_e.cpp/quantizer.h
Normal file
@ -0,0 +1,111 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-alloc.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
struct encodec_quant_block {
|
||||
struct ggml_tensor *embed;
|
||||
};
|
||||
|
||||
struct encodec_quantizer {
|
||||
std::vector<encodec_quant_block> blocks;
|
||||
};
|
||||
|
||||
struct ggml_tensor *encodec_forward_quantizer_encode(
|
||||
const struct encodec_quantizer *quantizer, struct ggml_context *ctx0,
|
||||
struct ggml_tensor *encoded_inp, const int n_bins, const int sr, const int bandwidth,
|
||||
const int hop_length) {
|
||||
|
||||
if (!encoded_inp) {
|
||||
fprintf(stderr, "%s: null input tensor\n", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const int frame_rate = (int)ceilf(sr / hop_length);
|
||||
const int n_q = get_num_quantizers_for_bandwidth(n_bins, frame_rate, bandwidth);
|
||||
|
||||
const int seq_length = encoded_inp->ne[0];
|
||||
|
||||
struct ggml_tensor *codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, seq_length, n_q);
|
||||
ggml_set_input(codes);
|
||||
|
||||
struct ggml_tensor *inpL = ggml_cont(ctx0, ggml_transpose(ctx0, encoded_inp));
|
||||
struct ggml_tensor *residual = inpL;
|
||||
struct ggml_tensor *indices;
|
||||
|
||||
for (int i = 0; i < n_q; i++) {
|
||||
encodec_quant_block block = quantizer->blocks[i];
|
||||
|
||||
// compute distance
|
||||
// [seq_length, n_bins]
|
||||
struct ggml_tensor *dp = ggml_scale(
|
||||
ctx0, ggml_mul_mat(ctx0, block.embed, residual), -2.0f);
|
||||
|
||||
// [n_bins]
|
||||
struct ggml_tensor *sqr_embed = ggml_sqr(ctx0, block.embed);
|
||||
struct ggml_tensor *sqr_embed_nrm = ggml_sum_rows(ctx0, sqr_embed);
|
||||
|
||||
// [seq_length]
|
||||
struct ggml_tensor *sqr_inp = ggml_sqr(ctx0, residual);
|
||||
struct ggml_tensor *sqr_inp_nrm = ggml_sum_rows(ctx0, sqr_inp);
|
||||
|
||||
// [seq_length, n_bins]
|
||||
struct ggml_tensor *dist = ggml_add(ctx0, ggml_repeat(ctx0, sqr_inp_nrm, dp), dp);
|
||||
dist = ggml_add(ctx0, ggml_repeat(ctx0, ggml_transpose(ctx0, sqr_embed_nrm), dist), dist);
|
||||
dist = ggml_neg(ctx0, dist);
|
||||
|
||||
// take the argmax over the column dimension
|
||||
// [seq_length]
|
||||
indices = ggml_argmax(ctx0, dist);
|
||||
|
||||
// look up in embedding table
|
||||
struct ggml_tensor *quantized = ggml_get_rows(ctx0, block.embed, indices);
|
||||
|
||||
residual = ggml_sub(ctx0, residual, quantized);
|
||||
|
||||
codes = ggml_set_1d(ctx0, codes, indices, i * codes->nb[1]);
|
||||
}
|
||||
|
||||
return codes;
|
||||
}
|
||||
|
||||
struct ggml_tensor *encodec_forward_quantizer_decode(
|
||||
const struct encodec_quantizer *quantizer, struct ggml_context *ctx0,
|
||||
struct ggml_tensor *codes, const int hidden_dim, const int n_bins, const int sr, const int bandwidth,
|
||||
const int hop_length) {
|
||||
|
||||
if (!codes) {
|
||||
fprintf(stderr, "%s: null input tensor\n", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const int seq_length = codes->ne[0];
|
||||
|
||||
const int frame_rate = (int)ceilf(sr / hop_length);
|
||||
const int n_q = get_num_quantizers_for_bandwidth(n_bins, frame_rate, bandwidth);
|
||||
|
||||
assert(n_q == codes->ne[1]);
|
||||
|
||||
struct ggml_tensor *quantized_out = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length);
|
||||
ggml_set_input(quantized_out);
|
||||
ggml_set_name(quantized_out, "quantized_out");
|
||||
|
||||
for (int i = 0; i < n_q; i++) {
|
||||
encodec_quant_block block = quantizer->blocks[i];
|
||||
|
||||
struct ggml_tensor *indices = ggml_view_1d(ctx0, codes, seq_length, i * codes->nb[1]);
|
||||
struct ggml_tensor *quantized = ggml_get_rows(ctx0, block.embed, indices);
|
||||
|
||||
quantized_out = ggml_add(ctx0, quantized_out, quantized);
|
||||
}
|
||||
|
||||
quantized_out = ggml_cont(ctx0, ggml_transpose(ctx0, quantized_out));
|
||||
|
||||
return quantized_out;
|
||||
}
|
||||
30
dep/include/vall_e.cpp/utils.h
Normal file
30
dep/include/vall_e.cpp/utils.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
const size_t MB = 1024 * 1024;
|
||||
|
||||
template <typename T>
|
||||
void read_safe(std::ifstream &infile, T &dest) {
|
||||
infile.read((char *)&dest, sizeof(T));
|
||||
}
|
||||
|
||||
int32_t get_num_codebooks(float bandwidth, int hop_length, float sample_rate) {
|
||||
// The number of codebooks is determined by the bandwidth selected.
|
||||
// Supported bandwidths are 1.5kbps (n_q = 2), 3 kbps (n_q = 4), 6 kbps (n_q = 8),
|
||||
// 12 kbps (n_q = 16) and 24kbps (n_q = 32).
|
||||
return (int32_t)ceilf(1000 * bandwidth / (ceilf(sample_rate / hop_length) * 10));
|
||||
}
|
||||
|
||||
int32_t get_bandwidth_per_quantizer(int bins, float frame_rate) {
|
||||
return log2f((float)bins) * frame_rate;
|
||||
}
|
||||
|
||||
int32_t get_num_quantizers_for_bandwidth(int bins, float frame_rate, float bandwidth) {
|
||||
float bw_per_q = get_bandwidth_per_quantizer(bins, frame_rate);
|
||||
int32_t n_q = MAX(1, floorf(bandwidth * 1000 / bw_per_q));
|
||||
return n_q;
|
||||
}
|
||||
178
dep/include/vall_e.cpp/vall_e.h
Normal file
178
dep/include/vall_e.cpp/vall_e.h
Normal file
@ -0,0 +1,178 @@
|
||||
#pragma once
|
||||
|
||||
// C++ deps
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <llama.h>
|
||||
|
||||
// handles defining platform specific macros and import/export decorators (copied from my engine's uf/config.h)
|
||||
#if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
|
||||
// Windows
|
||||
#define VALL_E_ENV "Windows"
|
||||
#define VALL_E_ENV_WINDOWS 1
|
||||
#define VALL_E_ENV_HEADER "windows.h"
|
||||
#if defined(__CYGWIN__)
|
||||
#define to_string(var) string(var)
|
||||
#endif
|
||||
#ifndef _WIN32_WINNT
|
||||
#define _WIN32_WINNT 0x0600
|
||||
#endif
|
||||
#ifndef WINVER
|
||||
#define WINVER 0x0600
|
||||
#endif
|
||||
|
||||
#define VALL_E_IO_ROOT "./data/"
|
||||
#elif defined(linux) || defined(__linux)
|
||||
// Linux
|
||||
#define VALL_E_ENV "Linux"
|
||||
#define VALL_E_ENV_LINUX 1
|
||||
#define VALL_E_ENV_HEADER "linux.h"
|
||||
|
||||
#define VALL_E_IO_ROOT "./data/"
|
||||
#elif defined(__APPLE__) || defined(MACOSX) || defined(macintosh) || defined(Macintosh)
|
||||
// MacOS
|
||||
#define VALL_E_ENV "OSX"
|
||||
#define VALL_E_ENV_OSX 1
|
||||
#define VALL_E_ENV_HEADER "osx.h"
|
||||
|
||||
#define VALL_E_IO_ROOT "./data/"
|
||||
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
|
||||
// FreeBSD
|
||||
#define VALL_E_ENV "FreeBSD"
|
||||
#define VALL_E_ENV_FREEBSD 1
|
||||
#define VALL_E_ENV_HEADER "freebsd.h"
|
||||
|
||||
#define VALL_E_IO_ROOT "./data/"
|
||||
#elif defined(__sh__)
|
||||
// Dreamcast
|
||||
#define VALL_E_ENV "Dreamcast"
|
||||
#define VALL_E_ENV_DREAMCAST 1
|
||||
#define VALL_E_ENV_HEADER "dreamcast.h"
|
||||
#include VALL_E_ENV_HEADER
|
||||
|
||||
#define _arch_dreamcast
|
||||
|
||||
#define VALL_E_IO_ROOT "/cd/"
|
||||
#else
|
||||
// Unsupported system
|
||||
#define VALL_E_ENV "Unknown"
|
||||
#define VALL_E_ENV_UNKNOWN 1
|
||||
#define VALL_E_ENV_HEADER "unknown.h"
|
||||
#warning Using "unknown"
|
||||
#error No support
|
||||
#endif
|
||||
|
||||
#if !defined(VALL_E_STATIC)
|
||||
#if defined(VALL_E_ENV_WINDOWS)
|
||||
// Windows compilers need specific (and different) keywords for export and import
|
||||
#define VALL_E_API_EXPORT __declspec(dllexport)
|
||||
#define VALL_E_API_IMPORT __declspec(dllimport)
|
||||
// For Visual C++ compilers, we also need to turn off this annoying C4251 warning
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4251)
|
||||
#endif
|
||||
#else // Linux, FreeBSD, Mac OS X
|
||||
#if __GNUC__ >= 4
|
||||
// GCC 4 has special keywords for showing/hidding symbols,
|
||||
// the same keyword is used for both importing and exporting
|
||||
#define VALL_E_API_EXPORT __attribute__ ((__visibility__ ("default")))
|
||||
#define VALL_E_API_IMPORT __attribute__ ((__visibility__ ("default")))
|
||||
#else
|
||||
// GCC < 4 has no mechanism to explicitely hide symbols, everything's exported
|
||||
#define VALL_E_API_EXPORT
|
||||
#define VALL_E_API_IMPORT
|
||||
#endif
|
||||
#endif
|
||||
#else
|
||||
// Static build doesn't need import/export macros
|
||||
#define VALL_E_API_EXPORT
|
||||
#define VALL_E_API_IMPORT
|
||||
#endif
|
||||
|
||||
#ifdef VALL_E_EXPORTS
|
||||
#define VALL_E_API VALL_E_API_EXPORT
|
||||
#else
|
||||
#define VALL_E_API VALL_E_API_IMPORT
|
||||
#endif
|
||||
|
||||
typedef llama_token token_t;
|
||||
typedef std::vector<std::vector<token_t>> vall_e_audio_codes_t;
|
||||
|
||||
const int ENCODEC_FRAMES_PER_SECOND = 75;
|
||||
const int MAX_DURATION = ENCODEC_FRAMES_PER_SECOND * 12;
|
||||
const int CTX_SIZE = 2048;
|
||||
const int N_THREADS = 8;
|
||||
const int N_GPU_LAYERS = 99;
|
||||
|
||||
const int MODALITY_AR_NAR = 0;
|
||||
const int MODALITY_NAR_LEN = 1;
|
||||
|
||||
// forward declarations
|
||||
struct io_map_t;
|
||||
struct llama_model;
|
||||
struct llama_context;
|
||||
struct encodec_context;
|
||||
|
||||
// model-specific parameters
|
||||
struct vall_e_context_params_t {
|
||||
std::string model_path = "./data/vall_e.gguf";
|
||||
std::string encodec_path = "./data/encodec.bin";
|
||||
int32_t gpu_layers = N_GPU_LAYERS;
|
||||
int32_t n_threads = N_THREADS;
|
||||
int32_t ctx_size = CTX_SIZE;
|
||||
bool verbose = false;
|
||||
};
|
||||
// inference-specific arguments
|
||||
struct vall_e_args_t {
|
||||
std::string text = "Hello world.";
|
||||
std::string prompt_path = "./data/prom.wav";
|
||||
std::string output_path = "./data/resp.wav";
|
||||
std::string language = "en";
|
||||
std::string task = "tts";
|
||||
int modality = MODALITY_NAR_LEN;
|
||||
int max_steps = 30;
|
||||
int max_duration = MAX_DURATION;
|
||||
};
|
||||
// stores everything needed for vall_e.cpp at runtime
|
||||
struct vall_e_context_t {
|
||||
vall_e_context_params_t params;
|
||||
|
||||
io_map_t* io_map = NULL; // pointer for reasons
|
||||
|
||||
struct {
|
||||
llama_model* model = NULL;
|
||||
llama_context* ctx = NULL;
|
||||
} llama;
|
||||
|
||||
struct {
|
||||
encodec_context* ctx;
|
||||
} encodec;
|
||||
};
|
||||
// stores the raw inputs to be fed
|
||||
struct vall_e_inputs_t {
|
||||
std::string task = "tts";
|
||||
std::string lang = "en";
|
||||
|
||||
token_t rvq_l = 0;
|
||||
|
||||
std::vector<token_t> phn = {};
|
||||
vall_e_audio_codes_t prom = {};
|
||||
vall_e_audio_codes_t resp = {};
|
||||
};
|
||||
|
||||
// encodec helpers
|
||||
VALL_E_API std::vector<float> read_audio_from_disk( const std::string& path );
|
||||
VALL_E_API void write_audio_to_disk( const std::vector<float>& waveform, const std::string& path );
|
||||
|
||||
VALL_E_API std::vector<std::vector<int32_t>> encode_audio( struct encodec_context* ectx, const std::vector<float>& waveform );
|
||||
VALL_E_API std::vector<float> decode_audio( struct encodec_context* ectx, const vall_e_audio_codes_t& codes_2d );
|
||||
|
||||
// context management
|
||||
VALL_E_API void vall_e_print_usage( char** argv, const vall_e_context_params_t& params, const vall_e_args_t& args );
|
||||
VALL_E_API bool vall_e_args_parse( int argc, char** argv, vall_e_context_params_t& params, vall_e_args_t& args );
|
||||
VALL_E_API vall_e_context_t* vall_e_load( const vall_e_context_params_t& params );
|
||||
VALL_E_API vall_e_inputs_t vall_e_prepare_inputs( vall_e_context_t* ctx, const std::string& text, const std::string& prompt_path, const std::string& lang = "auto", const std::string& task = "tts" );
|
||||
VALL_E_API vall_e_audio_codes_t vall_e_generate( vall_e_context_t* ctx, vall_e_inputs_t& inputs, int max_steps, int max_duration, int modality = MODALITY_NAR_LEN );
|
||||
VALL_E_API void vall_e_free( vall_e_context_t* ctx );
|
||||
@ -27,9 +27,12 @@ To be filled.
|
||||
* *very* loosely integrated
|
||||
* basic shapes and triangulated mesh collision and some form of ray queries
|
||||
* OpenAL for audio
|
||||
* Currently only loads from ogg (vorbis) files
|
||||
* Currently only loads from `.ogg` (vorbis) files
|
||||
* Supports loading in full and streaming
|
||||
* *very* loosely integrated
|
||||
* Speech synthesis using [vall_e.cpp](https://github.com/e-c-k-e-r/vall-e/)
|
||||
* `win64.gcc.vulkan` binaries can be found [here](https://github.com/e-c-k-e-r/vall-e/releases/tag/vall_e.cpp), if compiled.
|
||||
* currently only generates `.wav` files
|
||||
|
||||
## Supported Systems
|
||||
|
||||
|
||||
16
engine/inc/uf/ext/vall_e/vall_e.h
Normal file
16
engine/inc/uf/ext/vall_e/vall_e.h
Normal file
@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#include <uf/config.h>
|
||||
|
||||
#if UF_USE_VALL_E
|
||||
|
||||
#include <vall_e.cpp/vall_e.h>
|
||||
namespace ext {
|
||||
namespace vall_e {
|
||||
void UF_API initialize( const std::string& model_path = "", const std::string& encodec_path = "" );
|
||||
std::string UF_API generate( const std::string& text, const std::string& prom, const std::string& lang = "en" );
|
||||
void UF_API terminate();
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -7,4 +7,4 @@
|
||||
#include "universal.h"
|
||||
// defines which implementation to use
|
||||
#include UF_ENV_HEADER
|
||||
//
|
||||
//
|
||||
|
||||
@ -7,4 +7,4 @@
|
||||
#include "universal.h"
|
||||
// defines which implementation to use
|
||||
#include UF_ENV_HEADER
|
||||
// this line is required
|
||||
//
|
||||
|
||||
@ -7,4 +7,4 @@
|
||||
#include "universal.h"
|
||||
// defines which implementation to use
|
||||
#include UF_ENV_HEADER
|
||||
//
|
||||
//
|
||||
|
||||
@ -12,6 +12,7 @@ namespace spec {
|
||||
protected:
|
||||
|
||||
public:
|
||||
spec::uni::Time::time_t UF_API_CALL unixTime();
|
||||
spec::uni::Time::time_t UF_API_CALL getTime();
|
||||
};
|
||||
};
|
||||
|
||||
@ -7,4 +7,4 @@
|
||||
#include "universal.h"
|
||||
// defines which implementation to use
|
||||
#include UF_ENV_HEADER
|
||||
//
|
||||
//
|
||||
|
||||
@ -18,8 +18,9 @@
|
||||
|
||||
namespace uf {
|
||||
namespace thread {
|
||||
extern UF_API uf::stl::string workerThreadName;
|
||||
extern UF_API uf::stl::string mainThreadName;
|
||||
extern UF_API uf::stl::string workerThreadName;
|
||||
extern UF_API uf::stl::string asyncThreadName;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -90,6 +90,8 @@ namespace uf {
|
||||
extern UF_API double previous;
|
||||
extern UF_API float delta;
|
||||
extern UF_API float clamp;
|
||||
|
||||
size_t UF_API time();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -118,7 +118,26 @@ namespace {
|
||||
this->scroll.bottom = true;
|
||||
reclaimFocus = true;
|
||||
|
||||
uf::console::execute( command );
|
||||
// to-do: add a way to either asynchronously invoke commands or not
|
||||
|
||||
uf::thread::queue( uf::thread::asyncThreadName, [=](){
|
||||
uf::console::execute( command );
|
||||
});
|
||||
/*
|
||||
// this blocks
|
||||
uf::thread::queue( uf::thread::fetchWorker(), [=](){
|
||||
uf::console::execute( command );
|
||||
});
|
||||
*/
|
||||
/*
|
||||
// this still blocks
|
||||
auto tasks = uf::thread::schedule(true);
|
||||
tasks.queue([=](){
|
||||
uf::console::execute( command );
|
||||
});
|
||||
uf::thread::execute( tasks );
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
ImGui::SetItemDefaultFocus();
|
||||
|
||||
52
engine/src/ext/vall_e/vall_e.cpp
Normal file
52
engine/src/ext/vall_e/vall_e.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
#include <uf/ext/vall_e/vall_e.h>
|
||||
#include <uf/utils/time/time.h>
|
||||
|
||||
#if UF_USE_VALL_E
|
||||
namespace {
|
||||
vall_e_context_t* ctx = NULL;
|
||||
}
|
||||
|
||||
void ext::vall_e::initialize( const std::string& model_path, const std::string& encodec_path ) {
|
||||
vall_e_context_params_t params;
|
||||
params.model_path = model_path == "" ? "./data/llm/vall_e.gguf" : model_path;
|
||||
params.encodec_path = encodec_path == "" ? "./data/llm/encodec.bin" : encodec_path;
|
||||
params.gpu_layers = N_GPU_LAYERS;
|
||||
params.n_threads = N_THREADS;
|
||||
params.ctx_size = CTX_SIZE;
|
||||
params.verbose = false;
|
||||
|
||||
::ctx = vall_e_load( params );
|
||||
if ( !::ctx || !::ctx->llama.model || !::ctx->llama.ctx || !::ctx->encodec.ctx ) {
|
||||
UF_MSG_ERROR("failed to initialize vall_e.cpp");
|
||||
return;
|
||||
}
|
||||
}
|
||||
std::string ext::vall_e::generate( const std::string& text, const std::string& prom, const std::string& lang ) {
|
||||
if ( !::ctx ) return "";
|
||||
|
||||
std::string path = "./data/tmp/" + std::to_string(uf::time::time()) + ".wav";
|
||||
|
||||
vall_e_args_t args;
|
||||
args.text = text;
|
||||
args.prompt_path = prom;
|
||||
args.output_path = path;
|
||||
args.language = lang == "" ? "en" : lang;
|
||||
args.task = "tts";
|
||||
args.modality = MODALITY_NAR_LEN;
|
||||
args.max_steps = 30;
|
||||
args.max_duration = MAX_DURATION;
|
||||
|
||||
auto inputs = vall_e_prepare_inputs( ::ctx, args.text, args.prompt_path, args.language );
|
||||
auto output_audio_codes = vall_e_generate( ::ctx, inputs, args.max_steps, args.max_duration, args.modality );
|
||||
auto waveform = decode_audio( ::ctx->encodec.ctx, output_audio_codes );
|
||||
write_audio_to_disk( waveform, args.output_path );
|
||||
//UF_MSG_DEBUG("Generated to {}", path);
|
||||
|
||||
return path;
|
||||
}
|
||||
void ext::vall_e::terminate() {
|
||||
if ( !::ctx ) return;
|
||||
|
||||
vall_e_free( ::ctx );
|
||||
}
|
||||
#endif
|
||||
@ -12,6 +12,9 @@ namespace {
|
||||
chrono_time_t start = getTimePoint();
|
||||
}
|
||||
|
||||
spec::uni::Time::time_t spec::uni::Time::unixTime() {
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(getTimePoint().time_since_epoch()).count();
|
||||
}
|
||||
spec::uni::Time::time_t spec::uni::Time::getTime() {
|
||||
std::chrono::duration<double> elapsed = getTimePoint() - start;
|
||||
return elapsed.count() * 1000000;
|
||||
|
||||
@ -38,18 +38,29 @@ void uf::console::initialize() {
|
||||
});
|
||||
|
||||
uf::console::registerCommand("callHook", "Calls a hook, passing the arguments as a JSON object", [&]( const uf::stl::string& arguments )->uf::stl::string{
|
||||
auto match = uf::string::match( arguments, "/^(.+?)(?: (.+?))?$/" );
|
||||
auto match = uf::string::match( arguments, "/^\"?(.+?)\"?(?: (.+?))?$/" );
|
||||
if ( match.empty() ) return "invalid invocation";
|
||||
|
||||
uf::stl::vector<pod::Hook::userdata_t> results;
|
||||
if ( match.size() > 2 ) {
|
||||
ext::json::Value json;
|
||||
ext::json::decode( json, match[2] );
|
||||
uf::hooks.call( match[1], json );
|
||||
|
||||
results = uf::hooks.call( match[1], json );
|
||||
} else {
|
||||
uf::hooks.call( match[1] );
|
||||
results = uf::hooks.call( match[1] );
|
||||
}
|
||||
|
||||
return "Hook executed: " + match[1];
|
||||
// this could probably be its own function
|
||||
uf::stl::string s_result = "";
|
||||
for ( auto i = 0; i < results.size(); ++i ) {
|
||||
auto& res = results[i];
|
||||
if ( res.is<uf::stl::string>() ) s_result += ::fmt::format("\n[{}] => {}", i, res.as<uf::stl::string>());
|
||||
else if ( res.is<ext::json::Value>() ) s_result += ::fmt::format("\n[{}] => {}", i, ext::json::encode( res.as<ext::json::Value>() ));
|
||||
else s_result += ::fmt::format("\n[{}] => Userdata: {}", i, (void*) res);
|
||||
}
|
||||
|
||||
return "Hook executed: " + match[1] + s_result;
|
||||
});
|
||||
|
||||
uf::console::registerCommand("json", "Modifies the gamestate by setting a JSON value", [&]( const uf::stl::string& arguments )->uf::stl::string{
|
||||
|
||||
@ -8,8 +8,9 @@ float uf::thread::limiter = 1.0f / 120.0f;
|
||||
uint uf::thread::workers = 1;
|
||||
std::thread::id uf::thread::mainThreadId = std::this_thread::get_id();
|
||||
bool uf::thread::async = false;
|
||||
uf::stl::string uf::thread::workerThreadName = "Worker";
|
||||
uf::stl::string uf::thread::mainThreadName = "Main";
|
||||
uf::stl::string uf::thread::workerThreadName = "Worker";
|
||||
uf::stl::string uf::thread::asyncThreadName = "Async";
|
||||
|
||||
#define UF_THREAD_ANNOUNCE(...) UF_MSG_DEBUG(__VA_ARGS__)
|
||||
|
||||
|
||||
@ -10,4 +10,8 @@ size_t uf::time::frame = 0;
|
||||
double uf::time::current = 0;
|
||||
double uf::time::previous = 0;
|
||||
float uf::time::delta = 0;
|
||||
float uf::time::clamp = 0;
|
||||
float uf::time::clamp = 0;
|
||||
|
||||
size_t uf::time::time() {
|
||||
return spec::time.unixTime();
|
||||
}
|
||||
69
ext/main.cpp
69
ext/main.cpp
@ -1,10 +1,10 @@
|
||||
#include "main.h"
|
||||
#include "ext.h"
|
||||
|
||||
#include <uf/ext/ext.h>
|
||||
#include <uf/ext/oal/oal.h>
|
||||
|
||||
#include <uf/spec/terminal/terminal.h>
|
||||
#include <uf/spec/controller/controller.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <uf/utils/time/time.h>
|
||||
#include <uf/utils/audio/audio.h>
|
||||
@ -22,32 +22,27 @@
|
||||
#include <uf/utils/graphic/graphic.h>
|
||||
#include <uf/utils/camera/camera.h>
|
||||
#include <uf/utils/http/http.h>
|
||||
#include <uf/utils/renderer/renderer.h>
|
||||
#include <uf/utils/io/console.h>
|
||||
#include <uf/utils/io/inputs.h>
|
||||
#include <uf/spec/terminal/terminal.h>
|
||||
#include <uf/spec/controller/controller.h>
|
||||
#include <uf/utils/memory/string.h>
|
||||
|
||||
#include <uf/engine/entity/entity.h>
|
||||
#include <uf/engine/graph/graph.h>
|
||||
#include <uf/utils/io/inputs.h>
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <uf/utils/memory/string.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include <regex>
|
||||
|
||||
#include "ext.h"
|
||||
|
||||
#include <uf/engine/scene/scene.h>
|
||||
#include <uf/engine/asset/asset.h>
|
||||
|
||||
#include <uf/utils/renderer/renderer.h>
|
||||
#include <uf/utils/io/console.h>
|
||||
#include <uf/ext/ext.h>
|
||||
#include <uf/ext/oal/oal.h>
|
||||
#include <uf/ext/discord/discord.h>
|
||||
#include <uf/ext/openvr/openvr.h>
|
||||
#include <uf/ext/lua/lua.h>
|
||||
#include <uf/ext/ultralight/ultralight.h>
|
||||
#include <uf/ext/imgui/imgui.h>
|
||||
#include <uf/ext/ffx/fsr.h>
|
||||
#include <uf/ext/imgui/imgui.h>
|
||||
#include <uf/ext/vall_e/vall_e.h>
|
||||
|
||||
bool ext::ready = false;
|
||||
uf::stl::vector<uf::stl::string> ext::arguments;
|
||||
@ -88,6 +83,11 @@ namespace {
|
||||
struct {
|
||||
bool enabled;
|
||||
} ultralight, discord, imgui;
|
||||
struct {
|
||||
bool enabled;
|
||||
std::string model_path = "";
|
||||
std::string encodec_path = "";
|
||||
} vall_e;
|
||||
} ext;
|
||||
|
||||
struct {
|
||||
@ -118,6 +118,10 @@ void EXT_API ext::load( ext::json::Value& json ) {
|
||||
::config.engine.ext.ultralight.enabled = json["engine"]["ext"]["ultralight"]["enabled"].as(::config.engine.ext.ultralight.enabled);
|
||||
::config.engine.ext.discord.enabled = json["engine"]["ext"]["discord"]["enabled"].as(::config.engine.ext.discord.enabled);
|
||||
::config.engine.ext.imgui.enabled = json["engine"]["ext"]["imgui"]["enabled"].as(::config.engine.ext.imgui.enabled);
|
||||
|
||||
::config.engine.ext.vall_e.enabled = json["engine"]["ext"]["vall_e"]["enabled"].as(::config.engine.ext.vall_e.enabled);
|
||||
::config.engine.ext.vall_e.model_path = json["engine"]["ext"]["vall_e"]["model_path"].as(::config.engine.ext.vall_e.model_path);
|
||||
::config.engine.ext.vall_e.encodec_path = json["engine"]["ext"]["vall_e"]["encodec_path"].as(::config.engine.ext.vall_e.encodec_path);
|
||||
|
||||
::config.engine.limiter.print = json["engine"]["debug"]["framerate"]["print"].as(::config.engine.limiter.print);
|
||||
|
||||
@ -305,6 +309,9 @@ void EXT_API ext::initialize() {
|
||||
/* Setup deferred Main thread */ {
|
||||
uf::thread::get(uf::thread::mainThreadName);
|
||||
}
|
||||
/* Setup non-blocking, asynchronous thread */ {
|
||||
uf::thread::get(uf::thread::asyncThreadName);
|
||||
}
|
||||
/* set JSON implicit preferences */ {
|
||||
ext::json::PREFERRED_ENCODING = ::json["engine"]["ext"]["json"]["encoding"].as(ext::json::PREFERRED_ENCODING);
|
||||
ext::json::PREFERRED_COMPRESSION = ::json["engine"]["ext"]["json"]["compression"].as(ext::json::PREFERRED_COMPRESSION);
|
||||
@ -717,6 +724,23 @@ void EXT_API ext::initialize() {
|
||||
if ( ::config.engine.ext.imgui.enabled ) {
|
||||
// ext::imgui::initialize();
|
||||
}
|
||||
#endif
|
||||
#if UF_USE_VALL_E
|
||||
if ( ::config.engine.ext.vall_e.enabled ) {
|
||||
ext::vall_e::initialize( ::config.engine.ext.vall_e.model_path, ::config.engine.ext.vall_e.encodec_path );
|
||||
|
||||
// bind the hook
|
||||
uf::hooks.addHook( "llm:VALL-E.synthesize", [&](ext::json::Value& json){
|
||||
auto text = json["text"].as<uf::stl::string>();
|
||||
auto prom = json["prom"].as<uf::stl::string>();
|
||||
|
||||
auto path = ext::vall_e::generate( text, prom );
|
||||
|
||||
UF_MSG_DEBUG("Called {} {}: {}", text, prom, path);
|
||||
|
||||
return path;
|
||||
});
|
||||
}
|
||||
#endif
|
||||
/* Add hooks */ {
|
||||
|
||||
@ -1138,6 +1162,11 @@ void EXT_API ext::terminate() {
|
||||
/* Terminate controllers */ {
|
||||
spec::controller::terminate();
|
||||
}
|
||||
#if UF_USE_VALL_E
|
||||
if ( ::config.engine.ext.vall_e.enabled ) {
|
||||
ext::vall_e::terminate();
|
||||
}
|
||||
#endif
|
||||
#if UF_USE_IMGUI
|
||||
if ( ::config.engine.ext.imgui.enabled ) {
|
||||
ext::imgui::terminate();
|
||||
|
||||
Loading…
Reference in New Issue
Block a user