vall_e.cpp phonemizing and tokenizing
This commit is contained in:
parent
8516bab15c
commit
6bf59bbd8b
|
@ -3,7 +3,7 @@ CXX = g++
|
||||||
INCS += -I./include
|
INCS += -I./include
|
||||||
LIBS += -L./libs
|
LIBS += -L./libs
|
||||||
|
|
||||||
LINKS += -lggml -lggml-base -lllama -lencodec
|
LINKS += -lggml -lggml-base -lllama -lencodec -lespeak-ng
|
||||||
FLAGS += -march=native -O3
|
FLAGS += -march=native -O3
|
||||||
|
|
||||||
SRCS := $(shell find ./ -name "*.cpp")
|
SRCS := $(shell find ./ -name "*.cpp")
|
||||||
|
|
|
@ -8,7 +8,7 @@ At the moment it's ***very*** work in progress.
|
||||||
|
|
||||||
Populate `./include/` with the `ggml`, `llama.cpp`, and `encodec.cpp` headers.
|
Populate `./include/` with the `ggml`, `llama.cpp`, and `encodec.cpp` headers.
|
||||||
|
|
||||||
Populate `./libs/` with the compiled libraries of `llama.cpp` and `encodec.cpp`.
|
Populate `./libs/` with the compiled libraries of `llama.cpp`, `encodec.cpp`, and `espeak-ng`.
|
||||||
|
|
||||||
Run `make`.
|
Run `make`.
|
||||||
|
|
||||||
|
@ -26,10 +26,10 @@ Run `make`.
|
||||||
* [x] load the quantized model
|
* [x] load the quantized model
|
||||||
* [x] orchestrate the required embeddings
|
* [x] orchestrate the required embeddings
|
||||||
* [x] juggle the output head / classifier properly
|
* [x] juggle the output head / classifier properly
|
||||||
* [ ] phonemize text
|
* [x] phonemize text
|
||||||
* with the help of espeak-ng
|
* with the help of espeak-ng
|
||||||
* [ ] tokenize phonemes
|
* [x] tokenize phonemes
|
||||||
* the tokenizer is being a huge thorn on actual sequences
|
* tokenize with `llama_tokenize` instead of a homebrewed method because the tokenizer is being a huge thorn
|
||||||
* [x] load audio from disk
|
* [x] load audio from disk
|
||||||
* [x] encode audio
|
* [x] encode audio
|
||||||
* [x] sum embeddings for the `prom` and prior `resp`s
|
* [x] sum embeddings for the `prom` and prior `resp`s
|
||||||
|
|
113
vall_e.cpp/include/decoder.h
Normal file
113
vall_e.cpp/include/decoder.h
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-alloc.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#include "lstm.h"
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
|
||||||
|
struct encodec_decoder_block {
|
||||||
|
// upsampling layers
|
||||||
|
struct ggml_tensor *us_conv_w;
|
||||||
|
struct ggml_tensor *us_conv_b;
|
||||||
|
|
||||||
|
// conv1
|
||||||
|
struct ggml_tensor *conv_1_w;
|
||||||
|
struct ggml_tensor *conv_1_b;
|
||||||
|
|
||||||
|
// conv2
|
||||||
|
struct ggml_tensor *conv_2_w;
|
||||||
|
struct ggml_tensor *conv_2_b;
|
||||||
|
|
||||||
|
// shortcut
|
||||||
|
struct ggml_tensor *conv_sc_w;
|
||||||
|
struct ggml_tensor *conv_sc_b;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct encodec_decoder {
|
||||||
|
struct ggml_tensor *init_conv_w;
|
||||||
|
struct ggml_tensor *init_conv_b;
|
||||||
|
|
||||||
|
encodec_lstm lstm;
|
||||||
|
|
||||||
|
struct ggml_tensor *final_conv_w;
|
||||||
|
struct ggml_tensor *final_conv_b;
|
||||||
|
|
||||||
|
std::vector<encodec_decoder_block> blocks;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_tensor *encodec_forward_decoder(
|
||||||
|
const struct encodec_decoder *decoder, struct ggml_context *ctx0,
|
||||||
|
struct ggml_tensor *quantized_out, const int *ratios, const int kernel_size, const int res_kernel_size,
|
||||||
|
const int stride) {
|
||||||
|
|
||||||
|
if (!quantized_out) {
|
||||||
|
fprintf(stderr, "%s: null input tensor\n", __func__);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor *inpL = strided_conv_1d(
|
||||||
|
ctx0, quantized_out, decoder->init_conv_w, decoder->init_conv_b, stride);
|
||||||
|
|
||||||
|
// lstm
|
||||||
|
{
|
||||||
|
struct ggml_tensor *cur = inpL;
|
||||||
|
|
||||||
|
const encodec_lstm lstm = decoder->lstm;
|
||||||
|
|
||||||
|
// first lstm layer
|
||||||
|
char l0_prefix[7] = "dec_l0";
|
||||||
|
struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
|
||||||
|
ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix);
|
||||||
|
|
||||||
|
// second lstm layer
|
||||||
|
char l1_prefix[7] = "dec_l1";
|
||||||
|
struct ggml_tensor *out = forward_pass_lstm_unilayer(
|
||||||
|
ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix);
|
||||||
|
|
||||||
|
inpL = ggml_add(ctx0, inpL, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
|
||||||
|
encodec_decoder_block block = decoder->blocks[layer_ix];
|
||||||
|
|
||||||
|
// upsampling layers
|
||||||
|
inpL = ggml_elu(ctx0, inpL);
|
||||||
|
|
||||||
|
inpL = strided_conv_transpose_1d(
|
||||||
|
ctx0, inpL, block.us_conv_w, block.us_conv_b, ratios[layer_ix]);
|
||||||
|
|
||||||
|
struct ggml_tensor *current = inpL;
|
||||||
|
|
||||||
|
// shortcut
|
||||||
|
struct ggml_tensor *shortcut = strided_conv_1d(
|
||||||
|
ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);
|
||||||
|
|
||||||
|
// conv1
|
||||||
|
current = ggml_elu(ctx0, current);
|
||||||
|
|
||||||
|
current = strided_conv_1d(
|
||||||
|
ctx0, current, block.conv_1_w, block.conv_1_b, stride);
|
||||||
|
|
||||||
|
// conv2
|
||||||
|
current = ggml_elu(ctx0, current);
|
||||||
|
|
||||||
|
current = strided_conv_1d(
|
||||||
|
ctx0, current, block.conv_2_w, block.conv_2_b, stride);
|
||||||
|
|
||||||
|
// residual connection
|
||||||
|
inpL = ggml_add(ctx0, current, shortcut);
|
||||||
|
}
|
||||||
|
|
||||||
|
// final conv
|
||||||
|
inpL = ggml_elu(ctx0, inpL);
|
||||||
|
|
||||||
|
struct ggml_tensor *decoded_inp = strided_conv_1d(
|
||||||
|
ctx0, inpL, decoder->final_conv_w, decoder->final_conv_b, stride);
|
||||||
|
|
||||||
|
return decoded_inp;
|
||||||
|
}
|
6434
vall_e.cpp/include/dr_wav.h
Normal file
6434
vall_e.cpp/include/dr_wav.h
Normal file
File diff suppressed because it is too large
Load Diff
184
vall_e.cpp/include/encodec.h
Normal file
184
vall_e.cpp/include/encodec.h
Normal file
|
@ -0,0 +1,184 @@
|
||||||
|
/*
|
||||||
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||||
|
│ Copyright 2024 Pierre-Antoine Bannier │
|
||||||
|
│ │
|
||||||
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||||
|
│ any purpose with or without fee is hereby granted, provided that the │
|
||||||
|
│ above copyright notice and this permission notice appear in all copies. │
|
||||||
|
│ │
|
||||||
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||||
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||||
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||||
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||||
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||||
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
/*
|
||||||
|
* This file contains the declarations of the structs and functions used in the encodec library.
|
||||||
|
* The library provides functionality for audio compression and decompression using a custom model.
|
||||||
|
* The model consists of an encoder, a quantizer and a decoder, each with their own set of parameters.
|
||||||
|
* The library also provides functions for loading and freeing the model, as well as compressing and decompressing audio data.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml-alloc.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
#include "ggml.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
struct encodec_context;
|
||||||
|
|
||||||
|
struct encodec_statistics {
|
||||||
|
// The time taken to load the model.
|
||||||
|
int64_t t_load_us;
|
||||||
|
// The time taken to compute the model.
|
||||||
|
int64_t t_compute_us;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads an encodec model from the specified file path.
|
||||||
|
*
|
||||||
|
* @param model_path The file path to the encodec model.
|
||||||
|
* @param offset The offset (in bytes) to the start of the model in the file.
|
||||||
|
* @param n_gpu_layers The number of GPU layers to use.
|
||||||
|
* @return A pointer to the encodec context struct.
|
||||||
|
*/
|
||||||
|
struct encodec_context *encodec_load_model(
|
||||||
|
const char *model_path,
|
||||||
|
const int offset,
|
||||||
|
int n_gpu_layers);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the target bandwidth for the given encodec context.
|
||||||
|
*
|
||||||
|
* @param ectx The encodec context to set the target bandwidth for.
|
||||||
|
* @param bandwidth The target bandwidth to set, in bits per second.
|
||||||
|
*/
|
||||||
|
void encodec_set_target_bandwidth(
|
||||||
|
struct encodec_context *ectx,
|
||||||
|
int bandwidth);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the sample rate for the given encodec context.
|
||||||
|
*
|
||||||
|
* @param ectx The encodec context to set the target bandwidth for.
|
||||||
|
* @param sample_rate The sample rate to set.
|
||||||
|
*/
|
||||||
|
void encodec_set_sample_rate(
|
||||||
|
struct encodec_context *ectx,
|
||||||
|
int sample_rate);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reconstructs audio from raw audio data using the specified encodec context.
|
||||||
|
*
|
||||||
|
* @param ectx The encodec context to use for reconstruction.
|
||||||
|
* @param raw_audio The raw audio data to reconstruct.
|
||||||
|
* @param n_samples The number of samples in the raw audio buffer.
|
||||||
|
* @param n_threads The number of threads to use for reconstruction.
|
||||||
|
* @return True if the reconstruction was successful, false otherwise.
|
||||||
|
*/
|
||||||
|
bool encodec_reconstruct_audio(
|
||||||
|
struct encodec_context *ectx,
|
||||||
|
const float *raw_audio,
|
||||||
|
const int n_samples,
|
||||||
|
int n_threads);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compresses audio data using the specified encodec context.
|
||||||
|
*
|
||||||
|
* @param ectx The encodec context to use for compression.
|
||||||
|
* @param raw_audio The raw audio data to compress.
|
||||||
|
* @param n_samples The number of samples in the raw audio buffer.
|
||||||
|
* @param n_threads The number of threads to use for compression.
|
||||||
|
* @return True if the compression was successful, false otherwise.
|
||||||
|
*/
|
||||||
|
bool encodec_compress_audio(
|
||||||
|
struct encodec_context *ectx,
|
||||||
|
const float *raw_audio,
|
||||||
|
const int n_samples,
|
||||||
|
int n_threads);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decompresses audio data using the specified encodec context.
|
||||||
|
*
|
||||||
|
* @param ectx The encodec context to use for decompression.
|
||||||
|
* @param codes The compressed audio data to decompress.
|
||||||
|
* @param n_codes The number of codes in the codes buffer.
|
||||||
|
* @param n_threads The number of threads to use for decompression.
|
||||||
|
* @return True if the audio data was successfully decompressed, false otherwise.
|
||||||
|
*/
|
||||||
|
bool encodec_decompress_audio(
|
||||||
|
struct encodec_context *ectx,
|
||||||
|
const int32_t *codes,
|
||||||
|
const int n_codes,
|
||||||
|
int n_threads);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the audio data from the given encodec context.
|
||||||
|
*
|
||||||
|
* @param ectx The encodec context to get the audio data from.
|
||||||
|
* @return A pointer to the audio data.
|
||||||
|
*/
|
||||||
|
float * encodec_get_audio(
|
||||||
|
struct encodec_context *ectx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the size of the audio data from the given encodec context.
|
||||||
|
*
|
||||||
|
* @param ectx The encodec context to get the audio size from.
|
||||||
|
* @return The size of the audio data.
|
||||||
|
*/
|
||||||
|
int encodec_get_audio_size(
|
||||||
|
struct encodec_context *ectx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the code data from the given encodec context.
|
||||||
|
*
|
||||||
|
* @param ectx The encodec context to get the code data from.
|
||||||
|
* @return A pointer to the code data.
|
||||||
|
*/
|
||||||
|
int32_t * encodec_get_codes(
|
||||||
|
struct encodec_context *ectx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the size of the code data from the given encodec context.
|
||||||
|
*
|
||||||
|
* @param ectx The encodec context to get the code size from.
|
||||||
|
* @return The size of the code data.
|
||||||
|
*/
|
||||||
|
int encodec_get_codes_size(
|
||||||
|
struct encodec_context *ectx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the statistics for the given encodec context.
|
||||||
|
*
|
||||||
|
* @param ectx The encodec context to get the statistics for.
|
||||||
|
* @return A pointer to the statistics struct.
|
||||||
|
*/
|
||||||
|
const struct encodec_statistics* encodec_get_statistics(
|
||||||
|
struct encodec_context *ectx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reset the statistics for the given encodec context.
|
||||||
|
*
|
||||||
|
* @param ectx The encodec context to reset the statistics for.
|
||||||
|
*/
|
||||||
|
void encodec_reset_statistics(
|
||||||
|
struct encodec_context *ectx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Frees the memory allocated for an encodec context.
|
||||||
|
*
|
||||||
|
* @param ectx The encodec context to free.
|
||||||
|
*/
|
||||||
|
void encodec_free(
|
||||||
|
struct encodec_context *ectx);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
109
vall_e.cpp/include/encoder.h
Normal file
109
vall_e.cpp/include/encoder.h
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "lstm.h"
|
||||||
|
|
||||||
|
// res + downsample block at some ratio
|
||||||
|
struct encodec_encoder_block {
|
||||||
|
// conv1
|
||||||
|
struct ggml_tensor *conv_1_w;
|
||||||
|
struct ggml_tensor *conv_1_b;
|
||||||
|
|
||||||
|
// conv2
|
||||||
|
struct ggml_tensor *conv_2_w;
|
||||||
|
struct ggml_tensor *conv_2_b;
|
||||||
|
|
||||||
|
// shortcut
|
||||||
|
struct ggml_tensor *conv_sc_w;
|
||||||
|
struct ggml_tensor *conv_sc_b;
|
||||||
|
|
||||||
|
// downsampling layers
|
||||||
|
struct ggml_tensor *ds_conv_w;
|
||||||
|
struct ggml_tensor *ds_conv_b;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct encodec_encoder {
|
||||||
|
struct ggml_tensor *init_conv_w;
|
||||||
|
struct ggml_tensor *init_conv_b;
|
||||||
|
|
||||||
|
encodec_lstm lstm;
|
||||||
|
|
||||||
|
struct ggml_tensor *final_conv_w;
|
||||||
|
struct ggml_tensor *final_conv_b;
|
||||||
|
|
||||||
|
std::vector<encodec_encoder_block> blocks;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_tensor *encodec_forward_encoder(
|
||||||
|
const struct encodec_encoder *encoder, struct ggml_context *ctx0,
|
||||||
|
struct ggml_tensor *inp, const int * ratios, const int kernel_size, const int res_kernel_size,
|
||||||
|
const int stride) {
|
||||||
|
|
||||||
|
if (!inp) {
|
||||||
|
fprintf(stderr, "%s: null input tensor\n", __func__);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor *inpL = strided_conv_1d(
|
||||||
|
ctx0, inp, encoder->init_conv_w, encoder->init_conv_b, stride);
|
||||||
|
|
||||||
|
for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
|
||||||
|
encodec_encoder_block block = encoder->blocks[layer_ix];
|
||||||
|
|
||||||
|
struct ggml_tensor *current = inpL;
|
||||||
|
|
||||||
|
// shortcut
|
||||||
|
struct ggml_tensor *shortcut = strided_conv_1d(
|
||||||
|
ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);
|
||||||
|
|
||||||
|
// conv1
|
||||||
|
current = ggml_elu(ctx0, current);
|
||||||
|
|
||||||
|
current = strided_conv_1d(
|
||||||
|
ctx0, current, block.conv_1_w, block.conv_1_b, stride);
|
||||||
|
|
||||||
|
// conv2
|
||||||
|
current = ggml_elu(ctx0, current);
|
||||||
|
|
||||||
|
current = strided_conv_1d(
|
||||||
|
ctx0, current, block.conv_2_w, block.conv_2_b, stride);
|
||||||
|
|
||||||
|
// residual connection
|
||||||
|
inpL = ggml_add(ctx0, current, shortcut);
|
||||||
|
|
||||||
|
// downsampling layers
|
||||||
|
inpL = ggml_elu(ctx0, inpL);
|
||||||
|
|
||||||
|
inpL = strided_conv_1d(
|
||||||
|
ctx0, inpL, block.ds_conv_w, block.ds_conv_b, ratios[3 - layer_ix]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// lstm
|
||||||
|
{
|
||||||
|
struct ggml_tensor *cur = inpL;
|
||||||
|
|
||||||
|
const encodec_lstm lstm = encoder->lstm;
|
||||||
|
|
||||||
|
// first lstm layer
|
||||||
|
char l0_prefix[7] = "enc_l0";
|
||||||
|
struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
|
||||||
|
ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix);
|
||||||
|
|
||||||
|
// second lstm layer
|
||||||
|
char l1_prefix[7] = "enc_l1";
|
||||||
|
struct ggml_tensor *out = forward_pass_lstm_unilayer(
|
||||||
|
ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix);
|
||||||
|
|
||||||
|
inpL = ggml_add(ctx0, inpL, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
// final conv
|
||||||
|
inpL = ggml_elu(ctx0, inpL);
|
||||||
|
|
||||||
|
struct ggml_tensor *encoded_inp = strided_conv_1d(
|
||||||
|
ctx0, inpL, encoder->final_conv_w, encoder->final_conv_b, stride);
|
||||||
|
|
||||||
|
return encoded_inp;
|
||||||
|
}
|
103
vall_e.cpp/include/espeak-ng/encoding.h
Normal file
103
vall_e.cpp/include/espeak-ng/encoding.h
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2017 Reece H. Dunn
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, see: <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#ifndef ESPEAK_NG_ENCODING_H
|
||||||
|
#define ESPEAK_NG_ENCODING_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
ESPEAKNG_ENCODING_UNKNOWN,
|
||||||
|
ESPEAKNG_ENCODING_US_ASCII,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_1,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_2,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_3,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_4,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_5,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_6,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_7,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_8,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_9,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_10,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_11,
|
||||||
|
// ISO-8859-12 is not a valid encoding.
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_13,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_14,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_15,
|
||||||
|
ESPEAKNG_ENCODING_ISO_8859_16,
|
||||||
|
ESPEAKNG_ENCODING_KOI8_R,
|
||||||
|
ESPEAKNG_ENCODING_ISCII,
|
||||||
|
ESPEAKNG_ENCODING_UTF_8,
|
||||||
|
ESPEAKNG_ENCODING_ISO_10646_UCS_2,
|
||||||
|
} espeak_ng_ENCODING;
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_ENCODING
|
||||||
|
espeak_ng_EncodingFromName(const char *encoding);
|
||||||
|
|
||||||
|
typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER;
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_TEXT_DECODER *
|
||||||
|
create_text_decoder(void);
|
||||||
|
|
||||||
|
ESPEAK_NG_API void
|
||||||
|
destroy_text_decoder(espeak_ng_TEXT_DECODER *decoder);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
text_decoder_decode_string(espeak_ng_TEXT_DECODER *decoder,
|
||||||
|
const char *string,
|
||||||
|
int length,
|
||||||
|
espeak_ng_ENCODING encoding);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
text_decoder_decode_string_auto(espeak_ng_TEXT_DECODER *decoder,
|
||||||
|
const char *string,
|
||||||
|
int length,
|
||||||
|
espeak_ng_ENCODING encoding);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
text_decoder_decode_wstring(espeak_ng_TEXT_DECODER *decoder,
|
||||||
|
const wchar_t *string,
|
||||||
|
int length);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
text_decoder_decode_string_multibyte(espeak_ng_TEXT_DECODER *decoder,
|
||||||
|
const void *input,
|
||||||
|
espeak_ng_ENCODING encoding,
|
||||||
|
int flags);
|
||||||
|
|
||||||
|
ESPEAK_NG_API int
|
||||||
|
text_decoder_eof(espeak_ng_TEXT_DECODER *decoder);
|
||||||
|
|
||||||
|
ESPEAK_NG_API uint32_t
|
||||||
|
text_decoder_getc(espeak_ng_TEXT_DECODER *decoder);
|
||||||
|
|
||||||
|
ESPEAK_NG_API uint32_t
|
||||||
|
text_decoder_peekc(espeak_ng_TEXT_DECODER *decoder);
|
||||||
|
|
||||||
|
ESPEAK_NG_API const void *
|
||||||
|
text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
223
vall_e.cpp/include/espeak-ng/espeak_ng.h
Normal file
223
vall_e.cpp/include/espeak-ng/espeak_ng.h
Normal file
|
@ -0,0 +1,223 @@
|
||||||
|
/* eSpeak NG API.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2015-2017 Reece H. Dunn
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ESPEAK_NG_H
|
||||||
|
#define ESPEAK_NG_H
|
||||||
|
|
||||||
|
#include <espeak-ng/speak_lib.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#ifdef LIBESPEAK_NG_EXPORT
|
||||||
|
#define ESPEAK_NG_API __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
#define ESPEAK_NG_API __declspec(dllimport)
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define ESPEAK_NG_API
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define ESPEAKNG_DEFAULT_VOICE "en"
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
ENS_GROUP_MASK = 0x70000000,
|
||||||
|
ENS_GROUP_ERRNO = 0x00000000, /* Values 0-255 map to errno error codes. */
|
||||||
|
ENS_GROUP_ESPEAK_NG = 0x10000000, /* eSpeak NG error codes. */
|
||||||
|
|
||||||
|
/* eSpeak NG 1.49.0 */
|
||||||
|
ENS_OK = 0,
|
||||||
|
ENS_COMPILE_ERROR = 0x100001FF,
|
||||||
|
ENS_VERSION_MISMATCH = 0x100002FF,
|
||||||
|
ENS_FIFO_BUFFER_FULL = 0x100003FF,
|
||||||
|
ENS_NOT_INITIALIZED = 0x100004FF,
|
||||||
|
ENS_AUDIO_ERROR = 0x100005FF,
|
||||||
|
ENS_VOICE_NOT_FOUND = 0x100006FF,
|
||||||
|
ENS_MBROLA_NOT_FOUND = 0x100007FF,
|
||||||
|
ENS_MBROLA_VOICE_NOT_FOUND = 0x100008FF,
|
||||||
|
ENS_EVENT_BUFFER_FULL = 0x100009FF,
|
||||||
|
ENS_NOT_SUPPORTED = 0x10000AFF,
|
||||||
|
ENS_UNSUPPORTED_PHON_FORMAT = 0x10000BFF,
|
||||||
|
ENS_NO_SPECT_FRAMES = 0x10000CFF,
|
||||||
|
ENS_EMPTY_PHONEME_MANIFEST = 0x10000DFF,
|
||||||
|
ENS_SPEECH_STOPPED = 0x10000EFF,
|
||||||
|
|
||||||
|
/* eSpeak NG 1.49.2 */
|
||||||
|
ENS_UNKNOWN_PHONEME_FEATURE = 0x10000FFF,
|
||||||
|
ENS_UNKNOWN_TEXT_ENCODING = 0x100010FF,
|
||||||
|
} espeak_ng_STATUS;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
ENOUTPUT_MODE_SYNCHRONOUS = 0x0001,
|
||||||
|
ENOUTPUT_MODE_SPEAK_AUDIO = 0x0002,
|
||||||
|
} espeak_ng_OUTPUT_MODE;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
ENGENDER_UNKNOWN = 0,
|
||||||
|
ENGENDER_MALE = 1,
|
||||||
|
ENGENDER_FEMALE = 2,
|
||||||
|
ENGENDER_NEUTRAL = 3,
|
||||||
|
} espeak_ng_VOICE_GENDER;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
void (*outputPhoSymbol)(char* pho_code,int pho_type);
|
||||||
|
void (*outputSilence)(short echo_tail);
|
||||||
|
void (*outputVoiced)(short sample);
|
||||||
|
void (*outputUnvoiced)(short sample);
|
||||||
|
} espeak_ng_OUTPUT_HOOKS;
|
||||||
|
|
||||||
|
/* eSpeak NG 1.49.0 */
|
||||||
|
|
||||||
|
typedef struct espeak_ng_ERROR_CONTEXT_ *espeak_ng_ERROR_CONTEXT;
|
||||||
|
|
||||||
|
ESPEAK_NG_API void
|
||||||
|
espeak_ng_ClearErrorContext(espeak_ng_ERROR_CONTEXT *context);
|
||||||
|
|
||||||
|
ESPEAK_NG_API void
|
||||||
|
espeak_ng_GetStatusCodeMessage(espeak_ng_STATUS status,
|
||||||
|
char *buffer,
|
||||||
|
size_t length);
|
||||||
|
|
||||||
|
ESPEAK_NG_API void
|
||||||
|
espeak_ng_PrintStatusCodeMessage(espeak_ng_STATUS status,
|
||||||
|
FILE *out,
|
||||||
|
espeak_ng_ERROR_CONTEXT context);
|
||||||
|
|
||||||
|
ESPEAK_NG_API void
|
||||||
|
espeak_ng_InitializePath(const char *path);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_Initialize(espeak_ng_ERROR_CONTEXT *context);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_InitializeOutput(espeak_ng_OUTPUT_MODE output_mode,
|
||||||
|
int buffer_length,
|
||||||
|
const char *device);
|
||||||
|
|
||||||
|
ESPEAK_NG_API int
|
||||||
|
espeak_ng_GetSampleRate(void);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_SetParameter(espeak_PARAMETER parameter,
|
||||||
|
int value,
|
||||||
|
int relative);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_SetPhonemeEvents(int enable, int ipa);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_SetPunctuationList(const wchar_t *punctlist);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_SetVoiceByName(const char *name);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_SetVoiceByFile(const char *filename);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_SetVoiceByProperties(espeak_VOICE *voice_selector);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_Synthesize(const void *text,
|
||||||
|
size_t size,
|
||||||
|
unsigned int position,
|
||||||
|
espeak_POSITION_TYPE position_type,
|
||||||
|
unsigned int end_position,
|
||||||
|
unsigned int flags,
|
||||||
|
unsigned int *unique_identifier,
|
||||||
|
void *user_data);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_SynthesizeMark(const void *text,
|
||||||
|
size_t size,
|
||||||
|
const char *index_mark,
|
||||||
|
unsigned int end_position,
|
||||||
|
unsigned int flags,
|
||||||
|
unsigned int *unique_identifier,
|
||||||
|
void *user_data);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_SpeakKeyName(const char *key_name);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_SpeakCharacter(wchar_t character);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_Cancel(void);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_Synchronize(void);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_Terminate(void);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_CompileDictionary(const char *dsource,
|
||||||
|
const char *dict_name,
|
||||||
|
FILE *log,
|
||||||
|
int flags,
|
||||||
|
espeak_ng_ERROR_CONTEXT *context);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_CompileMbrolaVoice(const char *path,
|
||||||
|
FILE *log,
|
||||||
|
espeak_ng_ERROR_CONTEXT *context);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_CompilePhonemeData(long rate,
|
||||||
|
FILE *log,
|
||||||
|
espeak_ng_ERROR_CONTEXT *context);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_CompileIntonation(FILE *log,
|
||||||
|
espeak_ng_ERROR_CONTEXT *context);
|
||||||
|
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_CompileIntonationPath(const char *source_path,
|
||||||
|
const char *destination_path,
|
||||||
|
FILE *log,
|
||||||
|
espeak_ng_ERROR_CONTEXT *context);
|
||||||
|
|
||||||
|
/* eSpeak NG 1.49.1 */
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_CompilePhonemeDataPath(long rate,
|
||||||
|
const char *source_path,
|
||||||
|
const char *destination_path,
|
||||||
|
FILE *log,
|
||||||
|
espeak_ng_ERROR_CONTEXT *context);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_SetOutputHooks(espeak_ng_OUTPUT_HOOKS* hooks);
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_SetConstF0(int f0);
|
||||||
|
|
||||||
|
ESPEAK_NG_API espeak_ng_STATUS
|
||||||
|
espeak_ng_SetRandSeed(long seed);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
709
vall_e.cpp/include/espeak-ng/speak_lib.h
Normal file
709
vall_e.cpp/include/espeak-ng/speak_lib.h
Normal file
|
@ -0,0 +1,709 @@
|
||||||
|
#ifndef SPEAK_LIB_H
|
||||||
|
#define SPEAK_LIB_H
|
||||||
|
/***************************************************************************
|
||||||
|
* Copyright (C) 2005 to 2012 by Jonathan Duddington *
|
||||||
|
* email: jonsd@users.sourceforge.net *
|
||||||
|
* *
|
||||||
|
* This program is free software; you can redistribute it and/or modify *
|
||||||
|
* it under the terms of the GNU General Public License as published by *
|
||||||
|
* the Free Software Foundation; either version 3 of the License, or *
|
||||||
|
* (at your option) any later version. *
|
||||||
|
* *
|
||||||
|
* This program is distributed in the hope that it will be useful, *
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||||||
|
* GNU General Public License for more details. *
|
||||||
|
* *
|
||||||
|
* You should have received a copy of the GNU General Public License *
|
||||||
|
* along with this program; if not, see: *
|
||||||
|
* <http://www.gnu.org/licenses/>. *
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************/
|
||||||
|
/* This is the header file for the library version of espeak */
|
||||||
|
/* */
|
||||||
|
/*************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#ifdef LIBESPEAK_NG_EXPORT
|
||||||
|
#define ESPEAK_API __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
#define ESPEAK_API __declspec(dllimport)
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define ESPEAK_API
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define ESPEAK_API_REVISION 12
|
||||||
|
/*
|
||||||
|
Revision 2
|
||||||
|
Added parameter "options" to eSpeakInitialize()
|
||||||
|
|
||||||
|
Revision 3
|
||||||
|
Added espeakWORDGAP to espeak_PARAMETER
|
||||||
|
|
||||||
|
Revision 4
|
||||||
|
Added flags parameter to espeak_CompileDictionary()
|
||||||
|
|
||||||
|
Revision 5
|
||||||
|
Added espeakCHARS_16BIT
|
||||||
|
|
||||||
|
Revision 6
|
||||||
|
Added macros: espeakRATE_MINIMUM, espeakRATE_MAXIMUM, espeakRATE_NORMAL
|
||||||
|
|
||||||
|
Revision 7 24.Dec.2011
|
||||||
|
Changed espeak_EVENT structure to add id.string[] for phoneme mnemonics.
|
||||||
|
Added espeakINITIALIZE_PHONEME_IPA option for espeak_Initialize() to report phonemes as IPA names.
|
||||||
|
|
||||||
|
Revision 8 26.Apr.2013
|
||||||
|
Added function espeak_TextToPhonemes().
|
||||||
|
|
||||||
|
Revision 9 30.May.2013
|
||||||
|
Changed function espeak_TextToPhonemes().
|
||||||
|
|
||||||
|
Revision 10 29.Aug.2014
|
||||||
|
Changed phonememode parameter to espeak_TextToPhonemes() and espeak_SetPhonemeTrace
|
||||||
|
|
||||||
|
Revision 11 (espeak-ng)
|
||||||
|
Made ESPEAK_API import/export symbols correctly on Windows.
|
||||||
|
|
||||||
|
Revision 12 (espeak-ng)
|
||||||
|
Exposed espeak_SetPhonemeCallback. This is available in eSpeak, but was not exposed in this header.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/********************/
|
||||||
|
/* Initialization */
|
||||||
|
/********************/
|
||||||
|
|
||||||
|
// values for 'value' in espeak_SetParameter(espeakRATE, value, 0), nominally in words-per-minute
|
||||||
|
#define espeakRATE_MINIMUM 80
|
||||||
|
#define espeakRATE_MAXIMUM 450
|
||||||
|
#define espeakRATE_NORMAL 175
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list.
|
||||||
|
espeakEVENT_WORD = 1, // Start of word
|
||||||
|
espeakEVENT_SENTENCE = 2, // Start of sentence
|
||||||
|
espeakEVENT_MARK = 3, // Mark
|
||||||
|
espeakEVENT_PLAY = 4, // Audio element
|
||||||
|
espeakEVENT_END = 5, // End of sentence or clause
|
||||||
|
espeakEVENT_MSG_TERMINATED = 6, // End of message
|
||||||
|
espeakEVENT_PHONEME = 7, // Phoneme, if enabled in espeak_Initialize()
|
||||||
|
espeakEVENT_SAMPLERATE = 8 // Set sample rate
|
||||||
|
} espeak_EVENT_TYPE;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
espeak_EVENT_TYPE type;
|
||||||
|
unsigned int unique_identifier; // message identifier (or 0 for key or character)
|
||||||
|
int text_position; // the number of characters from the start of the text
|
||||||
|
int length; // word length, in characters (for espeakEVENT_WORD)
|
||||||
|
int audio_position; // the time in mS within the generated speech output data
|
||||||
|
int sample; // sample id (internal use)
|
||||||
|
void* user_data; // pointer supplied by the calling program
|
||||||
|
union {
|
||||||
|
int number; // used for WORD and SENTENCE events.
|
||||||
|
const char *name; // used for MARK and PLAY events. UTF8 string
|
||||||
|
char string[8]; // used for phoneme names (UTF8). Terminated by a zero byte unless the name needs the full 8 bytes.
|
||||||
|
} id;
|
||||||
|
} espeak_EVENT;
|
||||||
|
/*
|
||||||
|
When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called.
|
||||||
|
|
||||||
|
|
||||||
|
In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED).
|
||||||
|
|
||||||
|
In PLAYBACK mode, the callback function is called as soon as an event happens.
|
||||||
|
|
||||||
|
For example suppose that the following message is supplied to espeak_Synth:
|
||||||
|
"hello, hello."
|
||||||
|
|
||||||
|
|
||||||
|
* Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function :
|
||||||
|
|
||||||
|
** Block 1:
|
||||||
|
<audio data> +
|
||||||
|
List of events: SENTENCE + WORD + LIST_TERMINATED
|
||||||
|
|
||||||
|
** Block 2:
|
||||||
|
<audio data> +
|
||||||
|
List of events: WORD + END + LIST_TERMINATED
|
||||||
|
|
||||||
|
** Block 3:
|
||||||
|
no audio data
|
||||||
|
List of events: MSG_TERMINATED + LIST_TERMINATED
|
||||||
|
|
||||||
|
|
||||||
|
* Once processed in PLAYBACK mode, it could lead to 5 calls of the callback function:
|
||||||
|
|
||||||
|
** SENTENCE
|
||||||
|
** WORD (call when the sounds are actually played)
|
||||||
|
** WORD
|
||||||
|
** END (call when the end of sentence is actually played.)
|
||||||
|
** MSG_TERMINATED
|
||||||
|
|
||||||
|
|
||||||
|
The MSG_TERMINATED event is the last event. It can inform the calling program to clear the user data related to the message.
|
||||||
|
So if the synthesis must be stopped, the callback function is called for each pending message with the MSG_TERMINATED event.
|
||||||
|
|
||||||
|
A MARK event indicates a <mark> element in the text.
|
||||||
|
A PLAY event indicates an <audio> element in the text, for which the calling program should play the named sound file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
POS_CHARACTER = 1,
|
||||||
|
POS_WORD,
|
||||||
|
POS_SENTENCE
|
||||||
|
} espeak_POSITION_TYPE;
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
/* PLAYBACK mode: plays the audio data, supplies events to the calling program*/
|
||||||
|
AUDIO_OUTPUT_PLAYBACK,
|
||||||
|
|
||||||
|
/* RETRIEVAL mode: supplies audio data and events to the calling program */
|
||||||
|
AUDIO_OUTPUT_RETRIEVAL,
|
||||||
|
|
||||||
|
/* SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed */
|
||||||
|
AUDIO_OUTPUT_SYNCHRONOUS,
|
||||||
|
|
||||||
|
/* Synchronous playback */
|
||||||
|
AUDIO_OUTPUT_SYNCH_PLAYBACK
|
||||||
|
|
||||||
|
} espeak_AUDIO_OUTPUT;
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
EE_OK=0,
|
||||||
|
EE_INTERNAL_ERROR=-1,
|
||||||
|
EE_BUFFER_FULL=1,
|
||||||
|
EE_NOT_FOUND=2
|
||||||
|
} espeak_ERROR;
|
||||||
|
|
||||||
|
#define espeakINITIALIZE_PHONEME_EVENTS 0x0001
|
||||||
|
#define espeakINITIALIZE_PHONEME_IPA 0x0002
|
||||||
|
#define espeakINITIALIZE_DONT_EXIT 0x8000
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path, int options);
|
||||||
|
/* Must be called before any synthesis functions are called.
|
||||||
|
output: the audio data can either be played by eSpeak or passed back by the SynthCallback function.
|
||||||
|
|
||||||
|
buflength: The length in mS of sound buffers passed to the SynthCallback function.
|
||||||
|
Value=0 gives a default of 60mS.
|
||||||
|
This parameter is only used for AUDIO_OUTPUT_RETRIEVAL and AUDIO_OUTPUT_SYNCHRONOUS modes.
|
||||||
|
|
||||||
|
path: The directory which contains the espeak-ng-data directory, or NULL for the default location.
|
||||||
|
|
||||||
|
options: bit 0: 1=allow espeakEVENT_PHONEME events.
|
||||||
|
bit 1: 1= espeakEVENT_PHONEME events give IPA phoneme names, not eSpeak phoneme names
|
||||||
|
bit 15: 1=don't exit if espeak_data is not found (used for --help)
|
||||||
|
|
||||||
|
Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR).
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef int (t_espeak_callback)(short*, int, espeak_EVENT*);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API void espeak_SetSynthCallback(t_espeak_callback* SynthCallback);
|
||||||
|
/* Must be called before any synthesis functions are called.
|
||||||
|
This specifies a function in the calling program which is called when a buffer of
|
||||||
|
speech sound data has been produced.
|
||||||
|
|
||||||
|
|
||||||
|
The callback function is of the form:
|
||||||
|
|
||||||
|
int SynthCallback(short *wav, int numsamples, espeak_EVENT *events);
|
||||||
|
|
||||||
|
wav: is the speech sound data which has been produced.
|
||||||
|
NULL indicates that the synthesis has been completed.
|
||||||
|
|
||||||
|
numsamples: is the number of entries in wav. This number may vary, may be less than
|
||||||
|
the value implied by the buflength parameter given in espeak_Initialize, and may
|
||||||
|
sometimes be zero (which does NOT indicate end of synthesis).
|
||||||
|
|
||||||
|
events: an array of espeak_EVENT items which indicate word and sentence events, and
|
||||||
|
also the occurrence if <mark> and <audio> elements within the text. The list of
|
||||||
|
events is terminated by an event of type = 0.
|
||||||
|
|
||||||
|
|
||||||
|
Callback returns: 0=continue synthesis, 1=abort synthesis.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API void espeak_SetUriCallback(int (*UriCallback)(int, const char*, const char*));
|
||||||
|
/* This function may be called before synthesis functions are used, in order to deal with
|
||||||
|
<audio> tags. It specifies a callback function which is called when an <audio> element is
|
||||||
|
encountered and allows the calling program to indicate whether the sound file which
|
||||||
|
is specified in the <audio> element is available and is to be played.
|
||||||
|
|
||||||
|
The callback function is of the form:
|
||||||
|
|
||||||
|
int UriCallback(int type, const char *uri, const char *base);
|
||||||
|
|
||||||
|
type: type of callback event. Currently only 1= <audio> element
|
||||||
|
|
||||||
|
uri: the "src" attribute from the <audio> element
|
||||||
|
|
||||||
|
base: the "xml:base" attribute (if any) from the <speak> element
|
||||||
|
|
||||||
|
Return: 1=don't play the sound, but speak the text alternative.
|
||||||
|
0=place a PLAY event in the event list at the point where the <audio> element
|
||||||
|
occurs. The calling program can then play the sound at that point.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API void espeak_SetPhonemeCallback(int (*PhonemeCallback)(const char *));
|
||||||
|
|
||||||
|
|
||||||
|
/********************/
|
||||||
|
/* Synthesis */
|
||||||
|
/********************/
|
||||||
|
|
||||||
|
|
||||||
|
#define espeakCHARS_AUTO 0
|
||||||
|
#define espeakCHARS_UTF8 1
|
||||||
|
#define espeakCHARS_8BIT 2
|
||||||
|
#define espeakCHARS_WCHAR 3
|
||||||
|
#define espeakCHARS_16BIT 4
|
||||||
|
|
||||||
|
#define espeakSSML 0x10
|
||||||
|
#define espeakPHONEMES 0x100
|
||||||
|
#define espeakENDPAUSE 0x1000
|
||||||
|
#define espeakKEEP_NAMEDATA 0x2000
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_ERROR espeak_Synth(const void *text,
|
||||||
|
size_t size,
|
||||||
|
unsigned int position,
|
||||||
|
espeak_POSITION_TYPE position_type,
|
||||||
|
unsigned int end_position,
|
||||||
|
unsigned int flags,
|
||||||
|
unsigned int* unique_identifier,
|
||||||
|
void* user_data);
|
||||||
|
/* Synthesize speech for the specified text. The speech sound data is passed to the calling
|
||||||
|
program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak.
|
||||||
|
|
||||||
|
text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters,
|
||||||
|
wide characters (wchar_t), or UTF8 encoding. Which of these is determined by the "flags"
|
||||||
|
parameter.
|
||||||
|
|
||||||
|
size: Equal to (or greatrer than) the size of the text data, in bytes. This is used in order
|
||||||
|
to allocate internal storage space for the text. This value is not used for
|
||||||
|
AUDIO_OUTPUT_SYNCHRONOUS mode.
|
||||||
|
|
||||||
|
position: The position in the text where speaking starts. Zero indicates speak from the
|
||||||
|
start of the text.
|
||||||
|
|
||||||
|
position_type: Determines whether "position" is a number of characters, words, or sentences.
|
||||||
|
Values:
|
||||||
|
|
||||||
|
end_position: If set, this gives a character position at which speaking will stop. A value
|
||||||
|
of zero indicates no end position.
|
||||||
|
|
||||||
|
flags: These may be OR'd together:
|
||||||
|
Type of character codes, one of:
|
||||||
|
espeakCHARS_UTF8 UTF8 encoding
|
||||||
|
espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language.
|
||||||
|
espeakCHARS_AUTO 8 bit or UTF8 (this is the default)
|
||||||
|
espeakCHARS_WCHAR Wide characters (wchar_t)
|
||||||
|
espeakCHARS_16BIT 16 bit characters.
|
||||||
|
|
||||||
|
espeakSSML Elements within < > are treated as SSML elements, or if not recognised are ignored.
|
||||||
|
|
||||||
|
espeakPHONEMES Text within [[ ]] is treated as phonemes codes (in espeak's Kirshenbaum encoding).
|
||||||
|
|
||||||
|
espeakENDPAUSE If set then a sentence pause is added at the end of the text. If not set then
|
||||||
|
this pause is suppressed.
|
||||||
|
|
||||||
|
unique_identifier: This must be either NULL, or point to an integer variable to
|
||||||
|
which eSpeak writes a message identifier number.
|
||||||
|
eSpeak includes this number in espeak_EVENT messages which are the result of
|
||||||
|
this call of espeak_Synth().
|
||||||
|
|
||||||
|
user_data: a pointer (or NULL) which will be passed to the callback function in
|
||||||
|
espeak_EVENT messages.
|
||||||
|
|
||||||
|
Return: EE_OK: operation achieved
|
||||||
|
EE_BUFFER_FULL: the command can not be buffered;
|
||||||
|
you may try after a while to call the function again.
|
||||||
|
EE_INTERNAL_ERROR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_ERROR espeak_Synth_Mark(const void *text,
|
||||||
|
size_t size,
|
||||||
|
const char *index_mark,
|
||||||
|
unsigned int end_position,
|
||||||
|
unsigned int flags,
|
||||||
|
unsigned int* unique_identifier,
|
||||||
|
void* user_data);
|
||||||
|
/* Synthesize speech for the specified text. Similar to espeak_Synth() but the start position is
|
||||||
|
specified by the name of a <mark> element in the text.
|
||||||
|
|
||||||
|
index_mark: The "name" attribute of a <mark> element within the text which specified the
|
||||||
|
point at which synthesis starts. UTF8 string.
|
||||||
|
|
||||||
|
For the other parameters, see espeak_Synth()
|
||||||
|
|
||||||
|
Return: EE_OK: operation achieved
|
||||||
|
EE_BUFFER_FULL: the command can not be buffered;
|
||||||
|
you may try after a while to call the function again.
|
||||||
|
EE_INTERNAL_ERROR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_ERROR espeak_Key(const char *key_name);
|
||||||
|
/* Speak the name of a keyboard key.
|
||||||
|
If key_name is a single character, it speaks the name of the character.
|
||||||
|
Otherwise, it speaks key_name as a text string.
|
||||||
|
|
||||||
|
Return: EE_OK: operation achieved
|
||||||
|
EE_BUFFER_FULL: the command can not be buffered;
|
||||||
|
you may try after a while to call the function again.
|
||||||
|
EE_INTERNAL_ERROR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_ERROR espeak_Char(wchar_t character);
|
||||||
|
/* Speak the name of the given character
|
||||||
|
|
||||||
|
Return: EE_OK: operation achieved
|
||||||
|
EE_BUFFER_FULL: the command can not be buffered;
|
||||||
|
you may try after a while to call the function again.
|
||||||
|
EE_INTERNAL_ERROR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/***********************/
|
||||||
|
/* Speech Parameters */
|
||||||
|
/***********************/
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
espeakSILENCE=0, /* internal use */
|
||||||
|
espeakRATE=1,
|
||||||
|
espeakVOLUME=2,
|
||||||
|
espeakPITCH=3,
|
||||||
|
espeakRANGE=4,
|
||||||
|
espeakPUNCTUATION=5,
|
||||||
|
espeakCAPITALS=6,
|
||||||
|
espeakWORDGAP=7,
|
||||||
|
espeakOPTIONS=8, // reserved for misc. options. not yet used
|
||||||
|
espeakINTONATION=9,
|
||||||
|
espeakSSML_BREAK_MUL=10,
|
||||||
|
|
||||||
|
espeakRESERVED2=11,
|
||||||
|
espeakEMPHASIS, /* internal use */
|
||||||
|
espeakLINELENGTH, /* internal use */
|
||||||
|
espeakVOICETYPE, // internal, 1=mbrola
|
||||||
|
N_SPEECH_PARAM /* last enum */
|
||||||
|
} espeak_PARAMETER;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
espeakPUNCT_NONE=0,
|
||||||
|
espeakPUNCT_ALL=1,
|
||||||
|
espeakPUNCT_SOME=2
|
||||||
|
} espeak_PUNCT_TYPE;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative);
|
||||||
|
/* Sets the value of the specified parameter.
|
||||||
|
relative=0 Sets the absolute value of the parameter.
|
||||||
|
relative=1 Sets a relative value of the parameter.
|
||||||
|
|
||||||
|
parameter:
|
||||||
|
espeakRATE: speaking speed in word per minute. Values 80 to 450.
|
||||||
|
|
||||||
|
espeakVOLUME: volume in range 0-200 or more.
|
||||||
|
0=silence, 100=normal full volume, greater values may produce amplitude compression or distortion
|
||||||
|
|
||||||
|
espeakPITCH: base pitch, range 0-100. 50=normal
|
||||||
|
|
||||||
|
espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal
|
||||||
|
|
||||||
|
espeakPUNCTUATION: which punctuation characters to announce:
|
||||||
|
value in espeak_PUNCT_TYPE (none, all, some),
|
||||||
|
see espeak_GetParameter() to specify which characters are announced.
|
||||||
|
|
||||||
|
espeakCAPITALS: announce capital letters by:
|
||||||
|
0=none,
|
||||||
|
1=sound icon,
|
||||||
|
2=spelling,
|
||||||
|
3 or higher, by raising pitch. This values gives the amount in Hz by which the pitch
|
||||||
|
of a word raised to indicate it has a capital letter.
|
||||||
|
|
||||||
|
espeakWORDGAP: pause between words, units of 10mS (at the default speed)
|
||||||
|
|
||||||
|
Return: EE_OK: operation achieved
|
||||||
|
EE_BUFFER_FULL: the command can not be buffered;
|
||||||
|
you may try after a while to call the function again.
|
||||||
|
EE_INTERNAL_ERROR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API int espeak_GetParameter(espeak_PARAMETER parameter, int current);
|
||||||
|
/* current=0 Returns the default value of the specified parameter.
|
||||||
|
current=1 Returns the current value of the specified parameter, as set by SetParameter()
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_ERROR espeak_SetPunctuationList(const wchar_t *punctlist);
|
||||||
|
/* Specified a list of punctuation characters whose names are to be spoken when the
|
||||||
|
value of the Punctuation parameter is set to "some".
|
||||||
|
|
||||||
|
punctlist: A list of character codes, terminated by a zero character.
|
||||||
|
|
||||||
|
Return: EE_OK: operation achieved
|
||||||
|
EE_BUFFER_FULL: the command can not be buffered;
|
||||||
|
you may try after a while to call the function again.
|
||||||
|
EE_INTERNAL_ERROR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define espeakPHONEMES_SHOW 0x01
|
||||||
|
#define espeakPHONEMES_IPA 0x02
|
||||||
|
#define espeakPHONEMES_TRACE 0x08
|
||||||
|
#define espeakPHONEMES_MBROLA 0x10
|
||||||
|
#define espeakPHONEMES_TIE 0x80
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API void espeak_SetPhonemeTrace(int phonememode, FILE *stream);
|
||||||
|
/* phonememode: Controls the output of phoneme symbols for the text
|
||||||
|
bits 0-2:
|
||||||
|
value=0 No phoneme output (default)
|
||||||
|
value=1 Output the translated phoneme symbols for the text
|
||||||
|
value=2 as (1), but produces IPA phoneme names rather than ascii
|
||||||
|
bit 3: output a trace of how the translation was done (showing the matching rules and list entries)
|
||||||
|
bit 4: produce pho data for mbrola
|
||||||
|
bit 7: use (bits 8-23) as a tie within multi-letter phonemes names
|
||||||
|
bits 8-23: separator character, between phoneme names
|
||||||
|
|
||||||
|
stream output stream for the phoneme symbols (and trace). If stream=NULL then it uses stdout.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API const char *espeak_TextToPhonemes(const void **textptr, int textmode, int phonememode);
|
||||||
|
/* Translates text into phonemes. Call espeak_SetVoiceByName() first, to select a language.
|
||||||
|
|
||||||
|
It returns a pointer to a character string which contains the phonemes for the text up to
|
||||||
|
end of a sentence, or comma, semicolon, colon, or similar punctuation.
|
||||||
|
|
||||||
|
textptr: The address of a pointer to the input text which is terminated by a zero character.
|
||||||
|
On return, the pointer has been advanced past the text which has been translated, or else set
|
||||||
|
to NULL to indicate that the end of the text has been reached.
|
||||||
|
|
||||||
|
textmode: Type of character codes, one of:
|
||||||
|
espeakCHARS_UTF8 UTF8 encoding
|
||||||
|
espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language.
|
||||||
|
espeakCHARS_AUTO 8 bit or UTF8 (this is the default)
|
||||||
|
espeakCHARS_WCHAR Wide characters (wchar_t)
|
||||||
|
espeakCHARS_16BIT 16 bit characters.
|
||||||
|
|
||||||
|
phoneme_mode
|
||||||
|
bit 1: 0=eSpeak's ascii phoneme names, 1= International Phonetic Alphabet (as UTF-8 characters).
|
||||||
|
bit 7: use (bits 8-23) as a tie within multi-letter phonemes names
|
||||||
|
bits 8-23: separator character, between phoneme names
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API void espeak_CompileDictionary(const char *path, FILE *log, int flags);
|
||||||
|
/* Compile pronunciation dictionary for a language which corresponds to the currently
|
||||||
|
selected voice. The required voice should be selected before calling this function.
|
||||||
|
|
||||||
|
path: The directory which contains the language's '_rules' and '_list' files.
|
||||||
|
'path' should end with a path separator character ('/').
|
||||||
|
log: Stream for error reports and statistics information. If log=NULL then stderr will be used.
|
||||||
|
|
||||||
|
flags: Bit 0: include source line information for debug purposes (This is displayed with the
|
||||||
|
-X command line option).
|
||||||
|
*/
|
||||||
|
/***********************/
|
||||||
|
/* Voice Selection */
|
||||||
|
/***********************/
|
||||||
|
|
||||||
|
|
||||||
|
// voice table
|
||||||
|
typedef struct {
|
||||||
|
const char *name; // a given name for this voice. UTF8 string.
|
||||||
|
const char *languages; // list of pairs of (byte) priority + (string) language (and dialect qualifier)
|
||||||
|
const char *identifier; // the filename for this voice within espeak-ng-data/voices
|
||||||
|
unsigned char gender; // 0=none 1=male, 2=female,
|
||||||
|
unsigned char age; // 0=not specified, or age in years
|
||||||
|
unsigned char variant; // only used when passed as a parameter to espeak_SetVoiceByProperties
|
||||||
|
unsigned char xx1; // for internal use
|
||||||
|
int score; // for internal use
|
||||||
|
void *spare; // for internal use
|
||||||
|
} espeak_VOICE;
|
||||||
|
|
||||||
|
/* Note: The espeak_VOICE structure is used for two purposes:
|
||||||
|
1. To return the details of the available voices.
|
||||||
|
2. As a parameter to espeak_SetVoiceByProperties() in order to specify selection criteria.
|
||||||
|
|
||||||
|
In (1), the "languages" field consists of a list of (UTF8) language names for which this voice
|
||||||
|
may be used, each language name in the list is terminated by a zero byte and is also preceded by
|
||||||
|
a single byte which gives a "priority" number. The list of languages is terminated by an
|
||||||
|
additional zero byte.
|
||||||
|
|
||||||
|
A language name consists of a language code, optionally followed by one or more qualifier (dialect)
|
||||||
|
names separated by hyphens (eg. "en-uk"). A voice might, for example, have languages "en-uk" and
|
||||||
|
"en". Even without "en" listed, voice would still be selected for the "en" language (because
|
||||||
|
"en-uk" is related) but at a lower priority.
|
||||||
|
|
||||||
|
The priority byte indicates how the voice is preferred for the language. A low number indicates a
|
||||||
|
more preferred voice, a higher number indicates a less preferred voice.
|
||||||
|
|
||||||
|
In (2), the "languages" field consists simply of a single (UTF8) language name, with no preceding
|
||||||
|
priority byte.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API const espeak_VOICE **espeak_ListVoices(espeak_VOICE *voice_spec);
|
||||||
|
/* Reads the voice files from espeak-ng-data/voices and creates an array of espeak_VOICE pointers.
|
||||||
|
The list is terminated by a NULL pointer
|
||||||
|
|
||||||
|
If voice_spec is NULL then all voices are listed.
|
||||||
|
If voice spec is given, then only the voices which are compatible with the voice_spec
|
||||||
|
are listed, and they are listed in preference order.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_ERROR espeak_SetVoiceByFile(const char *filename);
|
||||||
|
/* Loads a voice given the file path. Language is not considered.
|
||||||
|
"filename" is a UTF8 string.
|
||||||
|
|
||||||
|
Return: EE_OK: operation achieved
|
||||||
|
EE_BUFFER_FULL: the command can not be buffered;
|
||||||
|
you may try after a while to call the function again.
|
||||||
|
EE_INTERNAL_ERROR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_ERROR espeak_SetVoiceByName(const char *name);
|
||||||
|
/* Searches for a voice with a matching "name" field. Language is not considered.
|
||||||
|
"name" is a UTF8 string.
|
||||||
|
|
||||||
|
Return: EE_OK: operation achieved
|
||||||
|
EE_BUFFER_FULL: the command can not be buffered;
|
||||||
|
you may try after a while to call the function again.
|
||||||
|
EE_INTERNAL_ERROR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_ERROR espeak_SetVoiceByProperties(espeak_VOICE *voice_spec);
|
||||||
|
/* An espeak_VOICE structure is used to pass criteria to select a voice. Any of the following
|
||||||
|
fields may be set:
|
||||||
|
|
||||||
|
name NULL, or a voice name
|
||||||
|
|
||||||
|
languages NULL, or a single language string (with optional dialect), eg. "en-uk", or "en"
|
||||||
|
|
||||||
|
gender 0=not specified, 1=male, 2=female
|
||||||
|
|
||||||
|
age 0=not specified, or an age in years
|
||||||
|
|
||||||
|
variant After a list of candidates is produced, scored and sorted, "variant" is used to index
|
||||||
|
that list and choose a voice.
|
||||||
|
variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_VOICE *espeak_GetCurrentVoice(void);
|
||||||
|
/* Returns the espeak_VOICE data for the currently selected voice.
|
||||||
|
This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_ERROR espeak_Cancel(void);
|
||||||
|
/* Stop immediately synthesis and audio output of the current text. When this
|
||||||
|
function returns, the audio output is fully stopped and the synthesizer is ready to
|
||||||
|
synthesize a new message.
|
||||||
|
|
||||||
|
Return: EE_OK: operation achieved
|
||||||
|
EE_INTERNAL_ERROR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API int espeak_IsPlaying(void);
|
||||||
|
/* Returns 1 if audio is played, 0 otherwise.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_ERROR espeak_Synchronize(void);
|
||||||
|
/* This function returns when all data have been spoken.
|
||||||
|
Return: EE_OK: operation achieved
|
||||||
|
EE_INTERNAL_ERROR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API espeak_ERROR espeak_Terminate(void);
|
||||||
|
/* last function to be called.
|
||||||
|
Return: EE_OK: operation achieved
|
||||||
|
EE_INTERNAL_ERROR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
ESPEAK_API const char *espeak_Info(const char **path_data);
|
||||||
|
/* Returns the version number string.
|
||||||
|
path_data returns the path to espeak_data
|
||||||
|
*/
|
||||||
|
#endif
|
76
vall_e.cpp/include/ggml-alloc.h
Normal file
76
vall_e.cpp/include/ggml-alloc.h
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
||||||
|
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
||||||
|
typedef struct ggml_backend * ggml_backend_t;
|
||||||
|
|
||||||
|
// Tensor allocator
|
||||||
|
struct ggml_tallocr {
|
||||||
|
ggml_backend_buffer_t buffer;
|
||||||
|
void * base;
|
||||||
|
size_t alignment;
|
||||||
|
size_t offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
|
||||||
|
|
||||||
|
// Graph allocator
|
||||||
|
/*
|
||||||
|
Example usage:
|
||||||
|
ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
|
||||||
|
|
||||||
|
// optional: create a worst-case graph and reserve the buffers to avoid reallocations
|
||||||
|
ggml_gallocr_reserve(galloc, build_graph(max_batch));
|
||||||
|
|
||||||
|
// allocate the graph
|
||||||
|
struct ggml_cgraph * graph = build_graph(batch);
|
||||||
|
ggml_gallocr_alloc_graph(galloc, graph);
|
||||||
|
|
||||||
|
printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0));
|
||||||
|
|
||||||
|
// evaluate the graph
|
||||||
|
ggml_backend_graph_compute(backend, graph);
|
||||||
|
*/
|
||||||
|
|
||||||
|
// special tensor flags for use with the graph allocator:
|
||||||
|
// ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
|
||||||
|
// ggml_set_output(): output tensors are never freed and never overwritten
|
||||||
|
|
||||||
|
typedef struct ggml_gallocr * ggml_gallocr_t;
|
||||||
|
|
||||||
|
GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs);
|
||||||
|
GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
|
||||||
|
|
||||||
|
// pre-allocate buffers from a measure graph - does not allocate or modify the graph
|
||||||
|
// call with a worst-case graph to avoid buffer reallocations
|
||||||
|
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
|
||||||
|
// returns false if the buffer allocation failed
|
||||||
|
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
|
||||||
|
GGML_API bool ggml_gallocr_reserve_n(
|
||||||
|
ggml_gallocr_t galloc,
|
||||||
|
struct ggml_cgraph * graph,
|
||||||
|
const int * node_buffer_ids,
|
||||||
|
const int * leaf_buffer_ids);
|
||||||
|
|
||||||
|
// automatic reallocation if the topology changes when using a single buffer
|
||||||
|
// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
|
||||||
|
GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
|
||||||
|
|
||||||
|
GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
|
||||||
|
|
||||||
|
// Utils
|
||||||
|
// Create a buffer and allocate all the tensors in a ggml_context
|
||||||
|
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
352
vall_e.cpp/include/ggml-backend.h
Normal file
352
vall_e.cpp/include/ggml-backend.h
Normal file
|
@ -0,0 +1,352 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-alloc.h"
|
||||||
|
|
||||||
|
#ifdef GGML_BACKEND_SHARED
|
||||||
|
# if defined(_WIN32) && !defined(__MINGW32__)
|
||||||
|
# ifdef GGML_BACKEND_BUILD
|
||||||
|
# define GGML_BACKEND_API __declspec(dllexport) extern
|
||||||
|
# else
|
||||||
|
# define GGML_BACKEND_API __declspec(dllimport) extern
|
||||||
|
# endif
|
||||||
|
# else
|
||||||
|
# define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# define GGML_BACKEND_API extern
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
||||||
|
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
||||||
|
typedef struct ggml_backend_event * ggml_backend_event_t;
|
||||||
|
typedef struct ggml_backend * ggml_backend_t;
|
||||||
|
typedef void * ggml_backend_graph_plan_t;
|
||||||
|
typedef struct ggml_backend_reg * ggml_backend_reg_t;
|
||||||
|
typedef struct ggml_backend_device * ggml_backend_dev_t;
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend buffer type
|
||||||
|
//
|
||||||
|
|
||||||
|
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
||||||
|
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
||||||
|
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend buffer
|
||||||
|
//
|
||||||
|
|
||||||
|
enum ggml_backend_buffer_usage {
|
||||||
|
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
||||||
|
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
|
||||||
|
GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
|
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
||||||
|
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
||||||
|
GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
|
||||||
|
|
||||||
|
// tensor copy between different backends
|
||||||
|
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend (stream)
|
||||||
|
//
|
||||||
|
|
||||||
|
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
|
||||||
|
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
|
||||||
|
GGML_API void ggml_backend_free(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
|
||||||
|
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
||||||
|
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
||||||
|
GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
|
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
|
||||||
|
// "offset" refers to the offset in tensor->data for setting/getting data
|
||||||
|
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
|
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
|
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||||
|
|
||||||
|
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||||
|
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
|
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||||
|
|
||||||
|
// NOTE: will be removed, use device version instead
|
||||||
|
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||||
|
GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||||
|
|
||||||
|
// asynchronous copy
|
||||||
|
// the copy is performed after all the currently queued operations in backend_src
|
||||||
|
// backend_dst will wait for the copy to complete before performing other operations
|
||||||
|
// automatic fallback to sync copy if async is not supported
|
||||||
|
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
|
GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Events
|
||||||
|
//
|
||||||
|
|
||||||
|
GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device);
|
||||||
|
GGML_API void ggml_backend_event_free(ggml_backend_event_t event);
|
||||||
|
GGML_API void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend);
|
||||||
|
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
||||||
|
GGML_API void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend device
|
||||||
|
//
|
||||||
|
|
||||||
|
enum ggml_backend_dev_type {
|
||||||
|
// CPU device using system memory
|
||||||
|
GGML_BACKEND_DEVICE_TYPE_CPU,
|
||||||
|
// GPU device using dedicated memory
|
||||||
|
GGML_BACKEND_DEVICE_TYPE_GPU,
|
||||||
|
// accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
|
||||||
|
GGML_BACKEND_DEVICE_TYPE_ACCEL
|
||||||
|
};
|
||||||
|
|
||||||
|
// functionality supported by the device
|
||||||
|
struct ggml_backend_dev_caps {
|
||||||
|
// asynchronous operations
|
||||||
|
bool async;
|
||||||
|
// pinned host buffer
|
||||||
|
bool host_buffer;
|
||||||
|
// creating buffers from host ptr
|
||||||
|
bool buffer_from_host_ptr;
|
||||||
|
// event synchronization
|
||||||
|
bool events;
|
||||||
|
};
|
||||||
|
|
||||||
|
// all the device properties
|
||||||
|
struct ggml_backend_dev_props {
|
||||||
|
const char * name;
|
||||||
|
const char * description;
|
||||||
|
size_t memory_free;
|
||||||
|
size_t memory_total;
|
||||||
|
enum ggml_backend_dev_type type;
|
||||||
|
struct ggml_backend_dev_caps caps;
|
||||||
|
};
|
||||||
|
|
||||||
|
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
|
||||||
|
GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device);
|
||||||
|
GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
|
||||||
|
GGML_API enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device);
|
||||||
|
GGML_API void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props);
|
||||||
|
GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
|
||||||
|
GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
|
||||||
|
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
|
||||||
|
|
||||||
|
GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
|
||||||
|
GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend (reg)
|
||||||
|
//
|
||||||
|
|
||||||
|
GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg);
|
||||||
|
GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
|
||||||
|
GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
|
||||||
|
GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
|
||||||
|
|
||||||
|
// Common functions that may be obtained using ggml_backend_reg_get_proc_address
|
||||||
|
|
||||||
|
// Split buffer type for tensor parallelism
|
||||||
|
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split);
|
||||||
|
// Set the number of threads for the backend
|
||||||
|
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
|
||||||
|
// Get additional buffer types provided by the device (returns a NULL-terminated array)
|
||||||
|
typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
|
||||||
|
// Set the abort callback for the backend
|
||||||
|
typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||||
|
// Get a list of feature flags supported by the backend (returns a NULL-terminated array)
|
||||||
|
struct ggml_backend_feature {
|
||||||
|
const char * name;
|
||||||
|
const char * value;
|
||||||
|
};
|
||||||
|
typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend registry
|
||||||
|
//
|
||||||
|
|
||||||
|
// Backend (reg) enumeration
|
||||||
|
GGML_API size_t ggml_backend_reg_count(void);
|
||||||
|
GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
|
||||||
|
GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name);
|
||||||
|
|
||||||
|
// Device enumeration
|
||||||
|
GGML_API size_t ggml_backend_dev_count(void);
|
||||||
|
GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
|
||||||
|
GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name);
|
||||||
|
GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type);
|
||||||
|
|
||||||
|
// Direct backend (stream) initialization
|
||||||
|
// = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
|
||||||
|
GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
|
||||||
|
// = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
|
||||||
|
GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params);
|
||||||
|
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
|
||||||
|
GGML_API ggml_backend_t ggml_backend_init_best(void);
|
||||||
|
|
||||||
|
// Load a backend from a dynamic library and register it
|
||||||
|
GGML_API ggml_backend_reg_t ggml_backend_load(const char * path);
|
||||||
|
// Unload a backend if loaded dynamically and unregister it
|
||||||
|
GGML_API void ggml_backend_unload(ggml_backend_reg_t reg);
|
||||||
|
// Load all known backends from dynamic libraries
|
||||||
|
GGML_API void ggml_backend_load_all(void);
|
||||||
|
GGML_API void ggml_backend_load_all_from_path(const char * dir_path);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Backend scheduler
|
||||||
|
//
|
||||||
|
|
||||||
|
// The backend scheduler allows for multiple backend devices to be used together
|
||||||
|
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
|
||||||
|
// The backends are selected based on:
|
||||||
|
// - the backend that supports the operation
|
||||||
|
// - the location of the pre-allocated tensors (e.g. the weights)
|
||||||
|
/*
|
||||||
|
Example usage:
|
||||||
|
|
||||||
|
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
|
||||||
|
// preferrably to run on the same backend as the buffer
|
||||||
|
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
||||||
|
|
||||||
|
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
|
||||||
|
|
||||||
|
// initialize buffers from a max size graph (optional)
|
||||||
|
reserve_graph = build_graph(sched, max_batch_size);
|
||||||
|
|
||||||
|
// manually assign nodes to a backend (optional, should not be needed in most cases)
|
||||||
|
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
||||||
|
ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
|
||||||
|
|
||||||
|
ggml_backend_sched_reserve(sched, reserve_graph);
|
||||||
|
|
||||||
|
// compute
|
||||||
|
graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically
|
||||||
|
}
|
||||||
|
|
||||||
|
// if there are graph inputs:
|
||||||
|
graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called)
|
||||||
|
ggml_backend_sched_reset(sched); // clear the allocation of the previous graph
|
||||||
|
ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it
|
||||||
|
ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors
|
||||||
|
ggml_backend_sched_graph_compute(sched, graph); // execute the graph
|
||||||
|
|
||||||
|
// as an alternative to the above it is also possible to assign the inputs to a dedicated context and
|
||||||
|
// allocate them statically via ggml_backend_alloc_ctx_tensors
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
||||||
|
|
||||||
|
// Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback)
|
||||||
|
// when ask == true, the scheduler wants to know if the user wants to observe this node
|
||||||
|
// this allows the scheduler to batch nodes together in order to evaluate them in a single call
|
||||||
|
//
|
||||||
|
// when ask == false, the scheduler is passing the node tensor to the user for observation
|
||||||
|
// if the user returns false, the scheduler will cancel the graph compute
|
||||||
|
//
|
||||||
|
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
|
||||||
|
|
||||||
|
// Initialize a backend scheduler, backends with low index are given priority over backends with high index
|
||||||
|
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
|
||||||
|
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
|
// Initialize backend buffers from a measure graph
|
||||||
|
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success
|
||||||
|
|
||||||
|
GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
|
||||||
|
GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
|
||||||
|
|
||||||
|
// Get the number of splits of the last graph
|
||||||
|
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
||||||
|
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
|
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
||||||
|
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
||||||
|
|
||||||
|
// Allocate and compute graph on the backend scheduler
|
||||||
|
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success
|
||||||
|
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||||
|
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||||
|
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
|
// Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph.
|
||||||
|
// This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers.
|
||||||
|
// The correct way to use this API is to discard the deallocated tensors and create new ones.
|
||||||
|
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
|
// Set a callback to be called for each resulting node during graph compute
|
||||||
|
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Utils
|
||||||
|
//
|
||||||
|
|
||||||
|
struct ggml_backend_graph_copy {
|
||||||
|
ggml_backend_buffer_t buffer;
|
||||||
|
struct ggml_context * ctx_allocated;
|
||||||
|
struct ggml_context * ctx_unallocated;
|
||||||
|
struct ggml_cgraph * graph;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Copy a graph to a different backend
|
||||||
|
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
|
||||||
|
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
|
||||||
|
|
||||||
|
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
||||||
|
|
||||||
|
// Compare the output of two backends
|
||||||
|
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
||||||
|
|
||||||
|
// Tensor initialization
|
||||||
|
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
||||||
|
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
|
||||||
|
|
||||||
|
// CPU buffer types are always available
|
||||||
|
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
||||||
|
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
25
vall_e.cpp/include/ggml-blas.h
Normal file
25
vall_e.cpp/include/ggml-blas.h
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
|
||||||
|
|
||||||
|
GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
|
||||||
|
|
||||||
|
// number of threads used for conversion to float
|
||||||
|
// for openblas and blis, this will also set the number of threads used for blas operations
|
||||||
|
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
123
vall_e.cpp/include/ggml-cann.h
Normal file
123
vall_e.cpp/include/ggml-cann.h
Normal file
|
@ -0,0 +1,123 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023-2024 The ggml authors
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to
|
||||||
|
* deal in the Software without restriction, including without limitation the
|
||||||
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
* sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
#include "ggml.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Maximum number of CANN devices supported.
|
||||||
|
*/
|
||||||
|
#define GGML_CANN_MAX_DEVICES 16
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initializes the CANN backend for a specified device.
|
||||||
|
*
|
||||||
|
* This function initializes the CANN backend for the given device.
|
||||||
|
* It verifies the device index, allocates a context, and creates a backend
|
||||||
|
* instance.
|
||||||
|
*
|
||||||
|
* @param device The index of the device to initialize.
|
||||||
|
* @return A pointer to the initialized backend instance, or nullptr on failure.
|
||||||
|
*/
|
||||||
|
GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Checks if a given backend is a CANN backend.
|
||||||
|
*
|
||||||
|
* This function verifies if the provided backend is a CANN backend by comparing
|
||||||
|
* its GUID with the CANN backend's GUID.
|
||||||
|
*
|
||||||
|
* @param backend The backend instance to check.
|
||||||
|
* @return True if the backend is a CANN backend, false otherwise.
|
||||||
|
*/
|
||||||
|
GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Retrieves the CANN buffer type for a specified device.
|
||||||
|
*
|
||||||
|
* This function initializes and returns the buffer type interface associated
|
||||||
|
* with the given device. It ensures thread-safe access using a mutex.
|
||||||
|
*
|
||||||
|
* @param device The device index for which to retrieve the buffer type.
|
||||||
|
* @return A pointer to the buffer type interface for the specified device, or
|
||||||
|
* nullptr if the device index is out of range.
|
||||||
|
*/
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t
|
||||||
|
ggml_backend_cann_buffer_type(int32_t device);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Retrieves the number of CANN devices available.
|
||||||
|
*
|
||||||
|
* This function returns the number of CANN devices available based on
|
||||||
|
* information obtained from `ggml_cann_info()`.
|
||||||
|
*
|
||||||
|
* @return The number of CANN devices available.
|
||||||
|
*/
|
||||||
|
GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
|
||||||
|
*
|
||||||
|
* @return A pointer to the host buffer type interface.
|
||||||
|
*/
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Retrieves the description of a specific CANN device.
|
||||||
|
*
|
||||||
|
* This function sets the specified device, retrieves the SoC name,
|
||||||
|
* and writes it into the provided description buffer.
|
||||||
|
*
|
||||||
|
* @param device The device index to retrieve the description for.
|
||||||
|
* @param description Pointer to a buffer where the description will be written.
|
||||||
|
* @param description_size Size of the description buffer.
|
||||||
|
*/
|
||||||
|
GGML_BACKEND_API void ggml_backend_cann_get_device_description(
|
||||||
|
int32_t device, char* description, size_t description_size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Retrieves the memory information of a specific CANN device.
|
||||||
|
*
|
||||||
|
* This function sets the specified device, retrieves the free and total
|
||||||
|
* memory information of the specified type (ACL_HBM_MEM), and stores them
|
||||||
|
* in the provided pointers.
|
||||||
|
*
|
||||||
|
* @param device The device index to retrieve memory information for.
|
||||||
|
* @param free Pointer to a variable where the free memory size will be stored.
|
||||||
|
* @param total Pointer to a variable where the total memory size will be
|
||||||
|
* stored.
|
||||||
|
*/
|
||||||
|
GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device,
|
||||||
|
size_t* free,
|
||||||
|
size_t* total);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
38
vall_e.cpp/include/ggml-cpp.h
Normal file
38
vall_e.cpp/include/ggml-cpp.h
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef __cplusplus
|
||||||
|
#error "This header is for C++ only"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-alloc.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
// Smart pointers for ggml types
|
||||||
|
|
||||||
|
// ggml
|
||||||
|
|
||||||
|
struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } };
|
||||||
|
struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } };
|
||||||
|
|
||||||
|
typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr;
|
||||||
|
typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr;
|
||||||
|
|
||||||
|
// ggml-alloc
|
||||||
|
|
||||||
|
struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } };
|
||||||
|
|
||||||
|
typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr;
|
||||||
|
|
||||||
|
// ggml-backend
|
||||||
|
|
||||||
|
struct ggml_backend_deleter { void operator()(ggml_backend_t backend) { ggml_backend_free(backend); } };
|
||||||
|
struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } };
|
||||||
|
struct ggml_backend_event_deleter { void operator()(ggml_backend_event_t event) { ggml_backend_event_free(event); } };
|
||||||
|
struct ggml_backend_sched_deleter { void operator()(ggml_backend_sched_t sched) { ggml_backend_sched_free(sched); } };
|
||||||
|
|
||||||
|
typedef std::unique_ptr<ggml_backend, ggml_backend_deleter> ggml_backend_ptr;
|
||||||
|
typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr;
|
||||||
|
typedef std::unique_ptr<ggml_backend_event, ggml_backend_event_deleter> ggml_backend_event_ptr;
|
||||||
|
typedef std::unique_ptr<ggml_backend_sched, ggml_backend_sched_deleter> ggml_backend_sched_ptr;
|
135
vall_e.cpp/include/ggml-cpu.h
Normal file
135
vall_e.cpp/include/ggml-cpu.h
Normal file
|
@ -0,0 +1,135 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// the compute plan that needs to be prepared for ggml_graph_compute()
|
||||||
|
// since https://github.com/ggerganov/ggml/issues/287
|
||||||
|
struct ggml_cplan {
|
||||||
|
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
|
||||||
|
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
|
||||||
|
|
||||||
|
int n_threads;
|
||||||
|
struct ggml_threadpool * threadpool;
|
||||||
|
|
||||||
|
// abort ggml_graph_compute when true
|
||||||
|
ggml_abort_callback abort_callback;
|
||||||
|
void * abort_callback_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
// numa strategies
|
||||||
|
enum ggml_numa_strategy {
|
||||||
|
GGML_NUMA_STRATEGY_DISABLED = 0,
|
||||||
|
GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
|
||||||
|
GGML_NUMA_STRATEGY_ISOLATE = 2,
|
||||||
|
GGML_NUMA_STRATEGY_NUMACTL = 3,
|
||||||
|
GGML_NUMA_STRATEGY_MIRROR = 4,
|
||||||
|
GGML_NUMA_STRATEGY_COUNT
|
||||||
|
};
|
||||||
|
|
||||||
|
GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
|
||||||
|
GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
||||||
|
|
||||||
|
GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
|
||||||
|
GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
||||||
|
|
||||||
|
GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
||||||
|
GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
||||||
|
|
||||||
|
GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
|
||||||
|
GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
||||||
|
|
||||||
|
GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
||||||
|
GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
|
||||||
|
|
||||||
|
GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
|
||||||
|
GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
||||||
|
|
||||||
|
GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
||||||
|
GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
||||||
|
|
||||||
|
GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
|
||||||
|
GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
|
||||||
|
GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
|
||||||
|
GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
|
||||||
|
GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
|
||||||
|
|
||||||
|
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
||||||
|
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
||||||
|
GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
|
||||||
|
const struct ggml_cgraph * cgraph,
|
||||||
|
int n_threads, /* = GGML_DEFAULT_N_THREADS */
|
||||||
|
struct ggml_threadpool * threadpool /* = NULL */ );
|
||||||
|
GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
|
||||||
|
|
||||||
|
// same as ggml_graph_compute() but the work data is allocated as a part of the context
|
||||||
|
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
||||||
|
GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
|
||||||
|
|
||||||
|
//
|
||||||
|
// system info
|
||||||
|
//
|
||||||
|
|
||||||
|
// x86
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_avx (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_fma (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void);
|
||||||
|
// ARM
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_neon (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_sve (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
|
||||||
|
// other
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
|
||||||
|
|
||||||
|
// Internal types and functions exposed for tests and benchmarks
|
||||||
|
|
||||||
|
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
|
||||||
|
const void * GGML_RESTRICT y, size_t by, int nrc);
|
||||||
|
|
||||||
|
struct ggml_type_traits_cpu {
|
||||||
|
ggml_from_float_t from_float;
|
||||||
|
ggml_vec_dot_t vec_dot;
|
||||||
|
enum ggml_type vec_dot_type;
|
||||||
|
int64_t nrows; // number of rows to process simultaneously
|
||||||
|
};
|
||||||
|
|
||||||
|
GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
|
||||||
|
|
||||||
|
GGML_BACKEND_API void ggml_cpu_init(void);
|
||||||
|
|
||||||
|
//
|
||||||
|
// CPU backend
|
||||||
|
//
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
|
||||||
|
|
||||||
|
GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend);
|
||||||
|
GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
|
||||||
|
GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
|
||||||
|
GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
47
vall_e.cpp/include/ggml-cuda.h
Normal file
47
vall_e.cpp/include/ggml-cuda.h
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef GGML_USE_HIP
|
||||||
|
#define GGML_CUDA_NAME "ROCm"
|
||||||
|
#define GGML_CUBLAS_NAME "hipBLAS"
|
||||||
|
#elif defined(GGML_USE_MUSA)
|
||||||
|
#define GGML_CUDA_NAME "MUSA"
|
||||||
|
#define GGML_CUBLAS_NAME "muBLAS"
|
||||||
|
#else
|
||||||
|
#define GGML_CUDA_NAME "CUDA"
|
||||||
|
#define GGML_CUBLAS_NAME "cuBLAS"
|
||||||
|
#endif
|
||||||
|
#define GGML_CUDA_MAX_DEVICES 16
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
GGML_BACKEND_API ggml_backend_t ggml_backend_cuda_init(int device);
|
||||||
|
|
||||||
|
GGML_BACKEND_API bool ggml_backend_is_cuda(ggml_backend_t backend);
|
||||||
|
|
||||||
|
// device buffer
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
|
||||||
|
|
||||||
|
// split tensor buffer that splits matrices by rows across multiple devices
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split);
|
||||||
|
|
||||||
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
|
||||||
|
|
||||||
|
GGML_BACKEND_API int ggml_backend_cuda_get_device_count(void);
|
||||||
|
GGML_BACKEND_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
|
||||||
|
GGML_BACKEND_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
|
||||||
|
|
||||||
|
GGML_BACKEND_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
|
||||||
|
GGML_BACKEND_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
50
vall_e.cpp/include/ggml-kompute.h
Normal file
50
vall_e.cpp/include/ggml-kompute.h
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GGML_KOMPUTE_MAX_DEVICES 16
|
||||||
|
|
||||||
|
struct ggml_vk_device {
|
||||||
|
int index;
|
||||||
|
int type; // same as VkPhysicalDeviceType
|
||||||
|
size_t heapSize;
|
||||||
|
const char * name;
|
||||||
|
const char * vendor;
|
||||||
|
int subgroupSize;
|
||||||
|
uint64_t bufferAlignment;
|
||||||
|
uint64_t maxAlloc;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
|
||||||
|
bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
|
||||||
|
bool ggml_vk_has_vulkan(void);
|
||||||
|
bool ggml_vk_has_device(void);
|
||||||
|
struct ggml_vk_device ggml_vk_current_device(void);
|
||||||
|
|
||||||
|
//
|
||||||
|
// backend API
|
||||||
|
//
|
||||||
|
|
||||||
|
// forward declaration
|
||||||
|
typedef struct ggml_backend * ggml_backend_t;
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
|
||||||
|
|
||||||
|
GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
66
vall_e.cpp/include/ggml-metal.h
Normal file
66
vall_e.cpp/include/ggml-metal.h
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
// Note: this description is outdated
|
||||||
|
//
|
||||||
|
// An interface allowing to compute ggml_cgraph with Metal
|
||||||
|
//
|
||||||
|
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
||||||
|
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
|
||||||
|
//
|
||||||
|
// How it works?
|
||||||
|
//
|
||||||
|
// As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
|
||||||
|
// interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
|
||||||
|
// use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
|
||||||
|
//
|
||||||
|
// You only need to make sure that all memory buffers that you used during the graph creation
|
||||||
|
// are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
|
||||||
|
// used during the graph evaluation to determine the arguments of the compute kernels.
|
||||||
|
//
|
||||||
|
// Synchronization between device and host memory (for example for input and output tensors)
|
||||||
|
// is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
struct ggml_tensor;
|
||||||
|
struct ggml_cgraph;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// backend API
|
||||||
|
// user-code should use only these functions
|
||||||
|
//
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void);
|
||||||
|
|
||||||
|
GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_DEPRECATED(
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
|
||||||
|
"obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
|
||||||
|
|
||||||
|
GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
||||||
|
|
||||||
|
// helper to check if the device supports a specific family
|
||||||
|
// ideally, the user code should be doing these checks
|
||||||
|
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
||||||
|
GGML_BACKEND_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
|
||||||
|
|
||||||
|
// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
|
||||||
|
GGML_BACKEND_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_metal_reg(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
26
vall_e.cpp/include/ggml-opencl.h
Normal file
26
vall_e.cpp/include/ggml-opencl.h
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
#ifndef GGML_OPENCL_H
|
||||||
|
#define GGML_OPENCL_H
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// backend API
|
||||||
|
//
|
||||||
|
GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void);
|
||||||
|
GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // GGML_OPENCL_H
|
216
vall_e.cpp/include/ggml-opt.h
Normal file
216
vall_e.cpp/include/ggml-opt.h
Normal file
|
@ -0,0 +1,216 @@
|
||||||
|
// This file contains functionality for training models using GGML.
|
||||||
|
// It is not strictly needed vs. just vanilla GGML but it provides a more high-level interface for common needs such as datasets.
|
||||||
|
// At the bottom of this file especially there are relatively high-level functions that are suitable use or adaptation in user code.
|
||||||
|
//
|
||||||
|
// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct ggml_opt_dataset;
|
||||||
|
struct ggml_opt_context;
|
||||||
|
struct ggml_opt_result;
|
||||||
|
|
||||||
|
typedef struct ggml_opt_dataset * ggml_opt_dataset_t;
|
||||||
|
typedef struct ggml_opt_context * ggml_opt_context_t;
|
||||||
|
typedef struct ggml_opt_result * ggml_opt_result_t;
|
||||||
|
|
||||||
|
// ====== Loss ======
|
||||||
|
|
||||||
|
// built-in loss types, i.e. the built-in quantities minimized by the optimizer
|
||||||
|
// custom loss types can be defined via mean or sum which simply reduce the outputs for all datapoints to a single value
|
||||||
|
enum ggml_opt_loss_type {
|
||||||
|
GGML_OPT_LOSS_TYPE_MEAN,
|
||||||
|
GGML_OPT_LOSS_TYPE_SUM,
|
||||||
|
GGML_OPT_LOSS_TYPE_CROSS_ENTROPY,
|
||||||
|
GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR,
|
||||||
|
};
|
||||||
|
|
||||||
|
// ====== Dataset ======
|
||||||
|
|
||||||
|
GGML_API ggml_opt_dataset_t ggml_opt_dataset_init(
|
||||||
|
int64_t ne_datapoint, // number of elements per datapoint
|
||||||
|
int64_t ne_label, // number of elements per label
|
||||||
|
int64_t ndata, // total number of datapoints/labels
|
||||||
|
int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
|
||||||
|
GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset);
|
||||||
|
|
||||||
|
// get underlying tensors that store the data
|
||||||
|
GGML_API struct ggml_tensor * ggml_opt_dataset_data (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata]
|
||||||
|
GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label, ndata]
|
||||||
|
|
||||||
|
// shuffle idata first datapoints from dataset with RNG from opt_ctx, shuffle all datapoints if idata is negative
|
||||||
|
GGML_API void ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int64_t idata);
|
||||||
|
|
||||||
|
// get batch at position ibatch from dataset and copy the data to data_batch and labels_batch
|
||||||
|
GGML_API void ggml_opt_dataset_get_batch(
|
||||||
|
ggml_opt_dataset_t dataset,
|
||||||
|
struct ggml_tensor * data_batch, // shape = [ne_datapoint, ndata_batch]
|
||||||
|
struct ggml_tensor * labels_batch, // shape = [ne_label, ndata_batch]
|
||||||
|
int64_t ibatch);
|
||||||
|
|
||||||
|
// ====== Model / Context ======
|
||||||
|
|
||||||
|
enum ggml_opt_build_type {
|
||||||
|
GGML_OPT_BUILD_TYPE_FORWARD,
|
||||||
|
GGML_OPT_BUILD_TYPE_GRAD,
|
||||||
|
GGML_OPT_BUILD_TYPE_OPT,
|
||||||
|
};
|
||||||
|
|
||||||
|
// parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
|
||||||
|
struct ggml_opt_optimizer_params {
|
||||||
|
// AdamW optimizer parameters
|
||||||
|
struct {
|
||||||
|
float alpha; // learning rate
|
||||||
|
float beta1;
|
||||||
|
float beta2;
|
||||||
|
float eps; // epsilon for numerical stability
|
||||||
|
float wd; // weight decay for AdamW, use 0.0f to disable
|
||||||
|
} adamw;
|
||||||
|
};
|
||||||
|
|
||||||
|
// callback to calculate optimizer parameters prior to a backward pass
|
||||||
|
// userdata can be used to pass arbitrary data
|
||||||
|
typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata);
|
||||||
|
|
||||||
|
// returns the default optimizer params (constant)
|
||||||
|
// userdata is not used
|
||||||
|
GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata);
|
||||||
|
|
||||||
|
// parameters for initializing a new optimization context
|
||||||
|
struct ggml_opt_params {
|
||||||
|
ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs
|
||||||
|
|
||||||
|
struct ggml_context * ctx_compute; // created in user code, holds non-static tensors
|
||||||
|
|
||||||
|
// the forward graph is defined by inputs and outputs
|
||||||
|
// those tensors and all tensors inbetween are not intended to be reusable between multiple optimization contexts
|
||||||
|
struct ggml_tensor * inputs;
|
||||||
|
struct ggml_tensor * outputs;
|
||||||
|
|
||||||
|
enum ggml_opt_loss_type loss_type;
|
||||||
|
enum ggml_opt_build_type build_type;
|
||||||
|
|
||||||
|
int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done
|
||||||
|
|
||||||
|
ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
|
||||||
|
void * get_opt_pars_ud; // userdata for calculating optimizer parameters
|
||||||
|
};
|
||||||
|
|
||||||
|
// get parameters for an optimization context with defaults set where possible
|
||||||
|
// parameters for which no sensible defaults exist are supplied as arguments to this function
|
||||||
|
GGML_API ggml_opt_params ggml_opt_default_params(
|
||||||
|
ggml_backend_sched_t backend_sched,
|
||||||
|
struct ggml_context * ctx_compute,
|
||||||
|
struct ggml_tensor * inputs,
|
||||||
|
struct ggml_tensor * outputs,
|
||||||
|
enum ggml_opt_loss_type loss_type);
|
||||||
|
|
||||||
|
GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
|
||||||
|
GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
|
||||||
|
|
||||||
|
// set gradients to zero, initilize loss, and optionally reset the optimizer
|
||||||
|
GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
|
||||||
|
|
||||||
|
// get underlying tensors that store data
|
||||||
|
GGML_API struct ggml_tensor * ggml_opt_inputs( ggml_opt_context_t opt_ctx); // forward graph input tensor
|
||||||
|
GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor
|
||||||
|
GGML_API struct ggml_tensor * ggml_opt_labels( ggml_opt_context_t opt_ctx); // labels to compare outputs against
|
||||||
|
GGML_API struct ggml_tensor * ggml_opt_loss( ggml_opt_context_t opt_ctx); // scalar tensor that contains the loss
|
||||||
|
GGML_API struct ggml_tensor * ggml_opt_pred( ggml_opt_context_t opt_ctx); // predictions made by outputs
|
||||||
|
GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
|
||||||
|
|
||||||
|
// ====== Optimization Result ======
|
||||||
|
|
||||||
|
GGML_API ggml_opt_result_t ggml_opt_result_init();
|
||||||
|
GGML_API void ggml_opt_result_free(ggml_opt_result_t result);
|
||||||
|
GGML_API void ggml_opt_result_reset(ggml_opt_result_t result);
|
||||||
|
|
||||||
|
// get data from result, uncertainties are optional and can be ignored by passing NULL
|
||||||
|
GGML_API void ggml_opt_result_ndata( ggml_opt_result_t result, int64_t * ndata); // writes 1 value, number of datapoints
|
||||||
|
GGML_API void ggml_opt_result_loss( ggml_opt_result_t result, double * loss, double * unc); // writes 1 value
|
||||||
|
GGML_API void ggml_opt_result_pred( ggml_opt_result_t result, int32_t * pred); // writes ndata values
|
||||||
|
GGML_API void ggml_opt_result_accuracy(ggml_opt_result_t result, double * accuracy, double * unc); // writes 1 value
|
||||||
|
|
||||||
|
// ====== Computation ======
|
||||||
|
|
||||||
|
// do forward pass, increment result if not NULL
|
||||||
|
GGML_API void ggml_opt_forward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
|
||||||
|
|
||||||
|
// do forward pass, increment result if not NULL, do backward pass
|
||||||
|
GGML_API void ggml_opt_forward_backward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
|
||||||
|
|
||||||
|
// ############################################################################
|
||||||
|
// ## The high-level functions start here. They do not depend on any private ##
|
||||||
|
// ## functions or structs and can be copied to and adapted for user code. ##
|
||||||
|
// ############################################################################
|
||||||
|
|
||||||
|
// ====== Intended Usage ======
|
||||||
|
//
|
||||||
|
// 1. Select the appropriate loss for your problem.
|
||||||
|
// 2. Create a dataset and set the data for the "data" tensor. Also set the "labels" tensor if your loss needs them.
|
||||||
|
// Setting the shard size to 1 will be fine, it's the granularity with which data is shuffled/loaded (bigger values are faster).
|
||||||
|
// 3. Create a GGML graph for your model with no_alloc == true. Use two separate contexts for the tensors.
|
||||||
|
// The first context should contain the model parameters and inputs and be allocated statically in user code.
|
||||||
|
// The second context should contain all other tensors and will be (re)allocated automatically.
|
||||||
|
// Due to this automated allocation the data of the second context is not defined when accessed in user code.
|
||||||
|
// Note that the second dimension of the inputs/outputs are interpreted as the number of datapoints in those tensors.
|
||||||
|
// 4. Call ggml_opt_fit. If you need more control you can use ggml_opt_epoch instead.
|
||||||
|
|
||||||
|
// signature for a callback while evaluating opt_ctx on dataset, called after an evaluation
|
||||||
|
typedef void (*ggml_opt_epoch_callback)(
|
||||||
|
bool train, // true after training evaluation, false after validation evaluation
|
||||||
|
ggml_opt_context_t opt_ctx,
|
||||||
|
ggml_opt_dataset_t dataset,
|
||||||
|
ggml_opt_result_t result, // result associated with the dataset subsection
|
||||||
|
int64_t ibatch, // number of batches that have been evaluated so far
|
||||||
|
int64_t ibatch_max, // total number of batches in this dataset subsection
|
||||||
|
int64_t t_start_us); // time at which the evaluation on the dataset subsection was started
|
||||||
|
|
||||||
|
// do training on front of dataset, do evaluation only on back of dataset
|
||||||
|
GGML_API void ggml_opt_epoch(
|
||||||
|
ggml_opt_context_t opt_ctx,
|
||||||
|
ggml_opt_dataset_t dataset,
|
||||||
|
ggml_opt_result_t result_train, // result to increment during training, ignored if NULL
|
||||||
|
ggml_opt_result_t result_eval, // result to increment during evaluation, ignored if NULL
|
||||||
|
int64_t idata_split, // data index at which to split training and evaluation
|
||||||
|
ggml_opt_epoch_callback callback_train,
|
||||||
|
ggml_opt_epoch_callback callback_eval);
|
||||||
|
|
||||||
|
// callback that prints a progress bar on stderr
|
||||||
|
GGML_API void ggml_opt_epoch_callback_progress_bar(
|
||||||
|
bool train,
|
||||||
|
ggml_opt_context_t opt_ctx,
|
||||||
|
ggml_opt_dataset_t dataset,
|
||||||
|
ggml_opt_result_t result,
|
||||||
|
int64_t ibatch,
|
||||||
|
int64_t ibatch_max,
|
||||||
|
int64_t t_start_us);
|
||||||
|
|
||||||
|
// fit model defined by inputs and outputs to dataset
|
||||||
|
GGML_API void ggml_opt_fit(
|
||||||
|
ggml_backend_sched_t backend_sched, // backend scheduler for constructing the compute graphs
|
||||||
|
ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs
|
||||||
|
ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch]
|
||||||
|
ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
|
||||||
|
ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
|
||||||
|
enum ggml_opt_loss_type loss_type, // loss to minimize
|
||||||
|
ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
|
||||||
|
int64_t nepoch, // how many times the dataset should be iterated over
|
||||||
|
int64_t nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
|
||||||
|
float val_split, // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
|
||||||
|
bool silent); // whether or not info prints to stderr should be suppressed
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
28
vall_e.cpp/include/ggml-rpc.h
Normal file
28
vall_e.cpp/include/ggml-rpc.h
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GGML_RPC_MAX_SERVERS 16
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
|
||||||
|
GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
|
||||||
|
|
||||||
|
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
|
||||||
|
|
||||||
|
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
49
vall_e.cpp/include/ggml-sycl.h
Normal file
49
vall_e.cpp/include/ggml-sycl.h
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
//
|
||||||
|
// MIT license
|
||||||
|
// Copyright (C) 2024 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#define GGML_SYCL_NAME "SYCL"
|
||||||
|
#define GGML_SYCL_MAX_DEVICES 48
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
GGML_BACKEND_API ggml_backend_t ggml_backend_sycl_init(int device);
|
||||||
|
|
||||||
|
GGML_BACKEND_API bool ggml_backend_is_sycl(ggml_backend_t backend);
|
||||||
|
|
||||||
|
// devide buffer
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
|
||||||
|
|
||||||
|
// split tensor buffer that splits matrices by rows across multiple devices
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
||||||
|
|
||||||
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
||||||
|
|
||||||
|
GGML_BACKEND_API void ggml_backend_sycl_print_sycl_devices(void);
|
||||||
|
GGML_BACKEND_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len);
|
||||||
|
GGML_BACKEND_API void ggml_backend_sycl_get_device_description(int device,
|
||||||
|
char *description,
|
||||||
|
size_t description_size);
|
||||||
|
GGML_BACKEND_API int ggml_backend_sycl_get_device_count();
|
||||||
|
GGML_BACKEND_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||||
|
|
||||||
|
// SYCL doesn't support registering host memory, keep here for reference
|
||||||
|
// GGML_BACKEND_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
||||||
|
// GGML_BACKEND_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_sycl_reg(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
31
vall_e.cpp/include/ggml-vulkan.h
Normal file
31
vall_e.cpp/include/ggml-vulkan.h
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GGML_VK_NAME "Vulkan"
|
||||||
|
#define GGML_VK_MAX_DEVICES 16
|
||||||
|
|
||||||
|
GGML_BACKEND_API void ggml_vk_instance_init(void);
|
||||||
|
|
||||||
|
// backend API
|
||||||
|
GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
|
||||||
|
|
||||||
|
GGML_BACKEND_API bool ggml_backend_is_vk(ggml_backend_t backend);
|
||||||
|
GGML_BACKEND_API int ggml_backend_vk_get_device_count(void);
|
||||||
|
GGML_BACKEND_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
|
||||||
|
GGML_BACKEND_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
|
||||||
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||||
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
|
||||||
|
|
||||||
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_vk_reg(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
2302
vall_e.cpp/include/ggml.h
Normal file
2302
vall_e.cpp/include/ggml.h
Normal file
File diff suppressed because it is too large
Load Diff
25
vall_e.cpp/include/llama-cpp.h
Normal file
25
vall_e.cpp/include/llama-cpp.h
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef __cplusplus
|
||||||
|
#error "This header is for C++ only"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "llama.h"
|
||||||
|
|
||||||
|
struct llama_model_deleter {
|
||||||
|
void operator()(llama_model * model) { llama_free_model(model); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct llama_context_deleter {
|
||||||
|
void operator()(llama_context * context) { llama_free(context); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct llama_sampler_deleter {
|
||||||
|
void operator()(llama_sampler * sampler) { llama_sampler_free(sampler); }
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;
|
||||||
|
typedef std::unique_ptr<llama_context, llama_context_deleter> llama_context_ptr;
|
||||||
|
typedef std::unique_ptr<llama_sampler, llama_sampler_deleter> llama_sampler_ptr;
|
181
vall_e.cpp/include/llama-impl.h
Normal file
181
vall_e.cpp/include/llama-impl.h
Normal file
|
@ -0,0 +1,181 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "llama.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#ifdef __MINGW32__
|
||||||
|
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
||||||
|
#else
|
||||||
|
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define LLAMA_ATTRIBUTE_FORMAT(...)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// logging
|
||||||
|
//
|
||||||
|
|
||||||
|
LLAMA_ATTRIBUTE_FORMAT(2, 3)
|
||||||
|
void llama_log_internal (ggml_log_level level, const char * format, ...);
|
||||||
|
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
|
||||||
|
|
||||||
|
#define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
|
||||||
|
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
||||||
|
#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
|
||||||
|
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
|
||||||
|
#define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
|
||||||
|
#define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
|
||||||
|
|
||||||
|
//
|
||||||
|
// helpers
|
||||||
|
//
|
||||||
|
|
||||||
|
struct time_meas {
|
||||||
|
time_meas(int64_t & t_acc, bool disable = false) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}
|
||||||
|
|
||||||
|
~time_meas() {
|
||||||
|
if (t_start_us >= 0) {
|
||||||
|
t_acc += ggml_time_us() - t_start_us;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const int64_t t_start_us;
|
||||||
|
|
||||||
|
int64_t & t_acc;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
||||||
|
if (search.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::string builder;
|
||||||
|
builder.reserve(s.length());
|
||||||
|
size_t pos = 0;
|
||||||
|
size_t last_pos = 0;
|
||||||
|
while ((pos = s.find(search, last_pos)) != std::string::npos) {
|
||||||
|
builder.append(s, last_pos, pos - last_pos);
|
||||||
|
builder.append(replace);
|
||||||
|
last_pos = pos + search.length();
|
||||||
|
}
|
||||||
|
builder.append(s, last_pos, std::string::npos);
|
||||||
|
s = std::move(builder);
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
|
||||||
|
struct llama_context * ctx
|
||||||
|
);
|
||||||
|
|
||||||
|
// the ring buffer works similarly to std::deque, but with a fixed capacity
|
||||||
|
template<typename T>
|
||||||
|
struct ring_buffer {
|
||||||
|
ring_buffer(size_t cap) : capacity(cap), data(cap) {}
|
||||||
|
|
||||||
|
T & front() {
|
||||||
|
if (sz == 0) {
|
||||||
|
throw std::runtime_error("ring buffer is empty");
|
||||||
|
}
|
||||||
|
return data[first];
|
||||||
|
}
|
||||||
|
|
||||||
|
const T & front() const {
|
||||||
|
if (sz == 0) {
|
||||||
|
throw std::runtime_error("ring buffer is empty");
|
||||||
|
}
|
||||||
|
return data[first];
|
||||||
|
}
|
||||||
|
|
||||||
|
T & back() {
|
||||||
|
if (sz == 0) {
|
||||||
|
throw std::runtime_error("ring buffer is empty");
|
||||||
|
}
|
||||||
|
return data[pos];
|
||||||
|
}
|
||||||
|
|
||||||
|
const T & back() const {
|
||||||
|
if (sz == 0) {
|
||||||
|
throw std::runtime_error("ring buffer is empty");
|
||||||
|
}
|
||||||
|
return data[pos];
|
||||||
|
}
|
||||||
|
|
||||||
|
void push_back(const T & value) {
|
||||||
|
if (capacity == 0) {
|
||||||
|
throw std::runtime_error("ring buffer: capacity is zero");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sz == capacity) {
|
||||||
|
// advance the start when buffer is full
|
||||||
|
first = (first + 1) % capacity;
|
||||||
|
} else {
|
||||||
|
sz++;
|
||||||
|
}
|
||||||
|
data[pos] = value;
|
||||||
|
pos = (pos + 1) % capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
T pop_front() {
|
||||||
|
if (sz == 0) {
|
||||||
|
throw std::runtime_error("ring buffer is empty");
|
||||||
|
}
|
||||||
|
T value = data[first];
|
||||||
|
first = (first + 1) % capacity;
|
||||||
|
sz--;
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
//T & operator[](size_t i) {
|
||||||
|
// if (i >= sz) {
|
||||||
|
// throw std::runtime_error("ring buffer: index out of bounds");
|
||||||
|
// }
|
||||||
|
// return data[(first + i) % capacity];
|
||||||
|
//}
|
||||||
|
|
||||||
|
//const T & at(size_t i) const {
|
||||||
|
// if (i >= sz) {
|
||||||
|
// throw std::runtime_error("ring buffer: index out of bounds");
|
||||||
|
// }
|
||||||
|
// return data[(first + i) % capacity];
|
||||||
|
//}
|
||||||
|
|
||||||
|
const T & rat(size_t i) const {
|
||||||
|
if (i >= sz) {
|
||||||
|
throw std::runtime_error("ring buffer: index out of bounds");
|
||||||
|
}
|
||||||
|
return data[(first + sz - i - 1) % capacity];
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<T> to_vector() const {
|
||||||
|
std::vector<T> result;
|
||||||
|
result.reserve(sz);
|
||||||
|
for (size_t i = 0; i < sz; i++) {
|
||||||
|
result.push_back(data[(first + i) % capacity]);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
// here only reset the status of the buffer
|
||||||
|
sz = 0;
|
||||||
|
first = 0;
|
||||||
|
pos = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool empty() const {
|
||||||
|
return sz == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size() const {
|
||||||
|
return sz;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t capacity = 0;
|
||||||
|
size_t sz = 0;
|
||||||
|
size_t first = 0;
|
||||||
|
size_t pos = 0;
|
||||||
|
std::vector<T> data;
|
||||||
|
};
|
170
vall_e.cpp/include/llama-vocab.h
Normal file
170
vall_e.cpp/include/llama-vocab.h
Normal file
|
@ -0,0 +1,170 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "llama-impl.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
|
struct llm_tokenizer;
|
||||||
|
|
||||||
|
struct llama_vocab {
|
||||||
|
using id = llama_token;
|
||||||
|
using token = std::string;
|
||||||
|
using tattr = llama_token_attr;
|
||||||
|
|
||||||
|
struct token_data {
|
||||||
|
token text;
|
||||||
|
float score;
|
||||||
|
tattr attr;
|
||||||
|
};
|
||||||
|
|
||||||
|
uint32_t n_vocab = 0; // TODO: not great because has to keep in sync with hparams.n_vocab
|
||||||
|
|
||||||
|
enum llama_vocab_type type = LLAMA_VOCAB_TYPE_SPM;
|
||||||
|
enum llama_vocab_pre_type type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
||||||
|
|
||||||
|
int max_token_len = 0; // used for optimizing longest token search
|
||||||
|
|
||||||
|
std::unordered_map<token, id> token_to_id;
|
||||||
|
std::vector<token_data> id_to_token;
|
||||||
|
|
||||||
|
std::vector<id> cache_special_tokens;
|
||||||
|
std::vector<token> cache_token_to_piece; // llama_token_to_piece(special = true);
|
||||||
|
|
||||||
|
std::map<std::pair<std::string, std::string>, int> bpe_ranks;
|
||||||
|
|
||||||
|
// default LLaMA special tokens
|
||||||
|
// TODO: should we set all of these to LLAMA_TOKEN_NULL?
|
||||||
|
id special_bos_id = 1;
|
||||||
|
id special_eos_id = 2;
|
||||||
|
id special_eot_id = LLAMA_TOKEN_NULL;
|
||||||
|
id special_eom_id = LLAMA_TOKEN_NULL;
|
||||||
|
id special_unk_id = 0;
|
||||||
|
id special_sep_id = LLAMA_TOKEN_NULL;
|
||||||
|
id special_pad_id = LLAMA_TOKEN_NULL;
|
||||||
|
id special_cls_id = LLAMA_TOKEN_NULL;
|
||||||
|
id special_mask_id = LLAMA_TOKEN_NULL;
|
||||||
|
|
||||||
|
id linefeed_id = 13;
|
||||||
|
|
||||||
|
// fim tokens
|
||||||
|
id special_fim_pre_id = LLAMA_TOKEN_NULL;
|
||||||
|
id special_fim_suf_id = LLAMA_TOKEN_NULL;
|
||||||
|
id special_fim_mid_id = LLAMA_TOKEN_NULL;
|
||||||
|
id special_fim_pad_id = LLAMA_TOKEN_NULL;
|
||||||
|
id special_fim_rep_id = LLAMA_TOKEN_NULL; // repo
|
||||||
|
id special_fim_sep_id = LLAMA_TOKEN_NULL; // file separator
|
||||||
|
|
||||||
|
// set of all tokens that cause "end of generation"
|
||||||
|
std::set<id> special_eog_ids;
|
||||||
|
|
||||||
|
// tokenizer flags
|
||||||
|
bool tokenizer_add_space_prefix = false;
|
||||||
|
bool tokenizer_add_bos = false;
|
||||||
|
bool tokenizer_add_eos = false;
|
||||||
|
bool tokenizer_ignore_merges = false;
|
||||||
|
bool tokenizer_clean_spaces = false; // clean_up_tokenization_spaces
|
||||||
|
bool tokenizer_remove_extra_whitespaces = false;
|
||||||
|
bool tokenizer_escape_whitespaces = true;
|
||||||
|
bool tokenizer_treat_whitespace_as_suffix = false;
|
||||||
|
|
||||||
|
std::vector<char> precompiled_charsmap;
|
||||||
|
|
||||||
|
llm_tokenizer * tokenizer = nullptr;
|
||||||
|
|
||||||
|
llama_vocab() = default;
|
||||||
|
~llama_vocab();
|
||||||
|
|
||||||
|
int find_bpe_rank(const std::string & token_left, const std::string & token_right) const;
|
||||||
|
|
||||||
|
void init_tokenizer();
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// internal API
|
||||||
|
//
|
||||||
|
|
||||||
|
// TODO: rename to llama_tokenize_impl
|
||||||
|
// TODO: This should probably be in llama.h
|
||||||
|
std::vector<llama_vocab::id> llama_tokenize_internal(
|
||||||
|
const llama_vocab & vocab,
|
||||||
|
std::string raw_text,
|
||||||
|
bool add_special,
|
||||||
|
bool parse_special = false);
|
||||||
|
|
||||||
|
// TODO: move the API below as member functions of llama_vocab
|
||||||
|
llama_token llama_byte_to_token_impl(const llama_vocab & vocab, uint8_t ch);
|
||||||
|
|
||||||
|
const char * llama_token_get_text_impl(const struct llama_vocab & vocab, llama_token token);
|
||||||
|
|
||||||
|
float llama_token_get_score_impl(const struct llama_vocab & vocab, llama_token token);
|
||||||
|
|
||||||
|
llama_token_attr llama_token_get_attr_impl(const struct llama_vocab & vocab, llama_token token);
|
||||||
|
|
||||||
|
bool llama_token_is_eog_impl(const struct llama_vocab & vocab, llama_token token);
|
||||||
|
|
||||||
|
bool llama_token_is_control_impl(const struct llama_vocab & vocab, llama_token token);
|
||||||
|
|
||||||
|
llama_token llama_token_bos_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_eos_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_eot_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_eom_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_cls_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_sep_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_nl_impl (const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_pad_impl(const struct llama_vocab & vocab);
|
||||||
|
|
||||||
|
llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_suffix_impl(const struct llama_vocab & vocab);
|
||||||
|
|
||||||
|
llama_token llama_token_fim_pre_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_fim_suf_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_fim_mid_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_fim_pad_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_fim_rep_impl(const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_fim_sep_impl(const struct llama_vocab & vocab);
|
||||||
|
|
||||||
|
bool llama_add_bos_token_impl(const struct llama_vocab & vocab);
|
||||||
|
bool llama_add_eos_token_impl(const struct llama_vocab & vocab);
|
||||||
|
|
||||||
|
int32_t llama_tokenize_impl(
|
||||||
|
const struct llama_vocab & vocab,
|
||||||
|
const char * text,
|
||||||
|
int32_t text_len,
|
||||||
|
llama_token * tokens,
|
||||||
|
int32_t n_tokens_max,
|
||||||
|
bool add_special,
|
||||||
|
bool parse_special);
|
||||||
|
|
||||||
|
// does not write null-terminator to buf
|
||||||
|
int32_t llama_token_to_piece_impl(
|
||||||
|
const struct llama_vocab & vocab,
|
||||||
|
llama_token token,
|
||||||
|
char * buf,
|
||||||
|
int32_t length,
|
||||||
|
int32_t lstrip,
|
||||||
|
bool special);
|
||||||
|
|
||||||
|
// check if token0 is contained as a prefix in token1
|
||||||
|
bool llama_token_is_prefix_impl(
|
||||||
|
const struct llama_vocab & vocab,
|
||||||
|
llama_token token0,
|
||||||
|
llama_token token1);
|
||||||
|
|
||||||
|
int32_t llama_detokenize_impl(
|
||||||
|
const struct llama_vocab & vocab,
|
||||||
|
const llama_token * tokens,
|
||||||
|
int32_t n_tokens,
|
||||||
|
char * text,
|
||||||
|
int32_t text_len_max,
|
||||||
|
bool remove_special,
|
||||||
|
bool unparse_special);
|
||||||
|
|
||||||
|
std::string llama_detokenize(
|
||||||
|
const struct llama_vocab & vocab,
|
||||||
|
const std::vector<llama_token> & tokens,
|
||||||
|
bool special);
|
1258
vall_e.cpp/include/llama.h
Normal file
1258
vall_e.cpp/include/llama.h
Normal file
File diff suppressed because it is too large
Load Diff
1277
vall_e.cpp/include/llama.modified.h
Normal file
1277
vall_e.cpp/include/llama.modified.h
Normal file
File diff suppressed because it is too large
Load Diff
1258
vall_e.cpp/include/llama.vanilla.h
Normal file
1258
vall_e.cpp/include/llama.vanilla.h
Normal file
File diff suppressed because it is too large
Load Diff
78
vall_e.cpp/include/lstm.h
Normal file
78
vall_e.cpp/include/lstm.h
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-alloc.h"
|
||||||
|
|
||||||
|
#include "ops.h"
|
||||||
|
|
||||||
|
struct encodec_lstm {
|
||||||
|
struct ggml_tensor *l0_ih_w;
|
||||||
|
struct ggml_tensor *l0_hh_w;
|
||||||
|
|
||||||
|
struct ggml_tensor *l0_ih_b;
|
||||||
|
struct ggml_tensor *l0_hh_b;
|
||||||
|
|
||||||
|
struct ggml_tensor *l1_ih_w;
|
||||||
|
struct ggml_tensor *l1_hh_w;
|
||||||
|
|
||||||
|
struct ggml_tensor *l1_ih_b;
|
||||||
|
struct ggml_tensor *l1_hh_b;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_tensor *forward_pass_lstm_unilayer(struct ggml_context *ctx0,
|
||||||
|
struct ggml_tensor *inp,
|
||||||
|
struct ggml_tensor *weight_ih,
|
||||||
|
struct ggml_tensor *weight_hh,
|
||||||
|
struct ggml_tensor *bias_ih,
|
||||||
|
struct ggml_tensor *bias_hh,
|
||||||
|
char *prefix) {
|
||||||
|
const int seq_length = inp->ne[0];
|
||||||
|
const int input_dim = inp->ne[1];
|
||||||
|
const int hidden_dim = weight_ih->ne[1] / 4;
|
||||||
|
|
||||||
|
char ct_name[10];
|
||||||
|
char ht_name[10];
|
||||||
|
|
||||||
|
snprintf(ct_name, 10, "%s_ct", prefix);
|
||||||
|
snprintf(ht_name, 10, "%s_ht", prefix);
|
||||||
|
|
||||||
|
struct ggml_tensor *hs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length);
|
||||||
|
ggml_set_input(hs);
|
||||||
|
|
||||||
|
struct ggml_tensor *c_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim);
|
||||||
|
ggml_set_input(c_t);
|
||||||
|
ggml_set_name(c_t, ct_name);
|
||||||
|
|
||||||
|
struct ggml_tensor *h_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim);
|
||||||
|
ggml_set_input(h_t);
|
||||||
|
ggml_set_name(h_t, ht_name);
|
||||||
|
|
||||||
|
struct ggml_tensor *current = ggml_cont(ctx0, ggml_transpose(ctx0, inp));
|
||||||
|
|
||||||
|
for (int t = 0; t < seq_length; t++) {
|
||||||
|
struct ggml_tensor *x_t = ggml_view_1d(ctx0, current, input_dim, t * current->nb[1]);
|
||||||
|
|
||||||
|
struct ggml_tensor *inp_gates = ggml_mul_mat(ctx0, weight_ih, x_t);
|
||||||
|
inp_gates = ggml_add(ctx0, inp_gates, bias_ih);
|
||||||
|
|
||||||
|
struct ggml_tensor *hid_gates = ggml_mul_mat(ctx0, weight_hh, h_t);
|
||||||
|
hid_gates = ggml_add(ctx0, hid_gates, bias_hh);
|
||||||
|
|
||||||
|
struct ggml_tensor *out_gates = ggml_add(ctx0, inp_gates, hid_gates);
|
||||||
|
|
||||||
|
struct ggml_tensor *i_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 0 * sizeof(float) * hidden_dim));
|
||||||
|
struct ggml_tensor *f_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 1 * sizeof(float) * hidden_dim));
|
||||||
|
struct ggml_tensor *g_t = ggml_tanh(ctx0 , ggml_view_1d(ctx0, out_gates, hidden_dim, 2 * sizeof(float) * hidden_dim));
|
||||||
|
struct ggml_tensor *o_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 3 * sizeof(float) * hidden_dim));
|
||||||
|
|
||||||
|
c_t = ggml_add(ctx0, ggml_mul(ctx0, f_t, c_t), ggml_mul(ctx0, i_t, g_t));
|
||||||
|
|
||||||
|
h_t = ggml_mul(ctx0, o_t, ggml_tanh(ctx0, c_t));
|
||||||
|
|
||||||
|
hs = ggml_set_1d(ctx0, hs, h_t, t * hs->nb[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
hs = ggml_cont(ctx0, ggml_transpose(ctx0, hs));
|
||||||
|
|
||||||
|
return hs;
|
||||||
|
}
|
17
vall_e.cpp/include/ops.h
Normal file
17
vall_e.cpp/include/ops.h
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
|
||||||
|
struct ggml_tensor *pad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
|
||||||
|
int padding_left, int padding_right);
|
||||||
|
|
||||||
|
struct ggml_tensor *unpad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
|
||||||
|
int padding_left, int padding_right);
|
||||||
|
|
||||||
|
struct ggml_tensor *strided_conv_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
|
||||||
|
struct ggml_tensor *conv_w, struct ggml_tensor *conv_b,
|
||||||
|
int stride);
|
||||||
|
|
||||||
|
struct ggml_tensor *strided_conv_transpose_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
|
||||||
|
struct ggml_tensor *conv_w, struct ggml_tensor *conv_b,
|
||||||
|
int stride);
|
111
vall_e.cpp/include/quantizer.h
Normal file
111
vall_e.cpp/include/quantizer.h
Normal file
|
@ -0,0 +1,111 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "ggml-alloc.h"
|
||||||
|
#include "ggml-backend.h"
|
||||||
|
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
struct encodec_quant_block {
|
||||||
|
struct ggml_tensor *embed;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct encodec_quantizer {
|
||||||
|
std::vector<encodec_quant_block> blocks;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_tensor *encodec_forward_quantizer_encode(
|
||||||
|
const struct encodec_quantizer *quantizer, struct ggml_context *ctx0,
|
||||||
|
struct ggml_tensor *encoded_inp, const int n_bins, const int sr, const int bandwidth,
|
||||||
|
const int hop_length) {
|
||||||
|
|
||||||
|
if (!encoded_inp) {
|
||||||
|
fprintf(stderr, "%s: null input tensor\n", __func__);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int frame_rate = (int)ceilf(sr / hop_length);
|
||||||
|
const int n_q = get_num_quantizers_for_bandwidth(n_bins, frame_rate, bandwidth);
|
||||||
|
|
||||||
|
const int seq_length = encoded_inp->ne[0];
|
||||||
|
|
||||||
|
struct ggml_tensor *codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, seq_length, n_q);
|
||||||
|
ggml_set_input(codes);
|
||||||
|
|
||||||
|
struct ggml_tensor *inpL = ggml_cont(ctx0, ggml_transpose(ctx0, encoded_inp));
|
||||||
|
struct ggml_tensor *residual = inpL;
|
||||||
|
struct ggml_tensor *indices;
|
||||||
|
|
||||||
|
for (int i = 0; i < n_q; i++) {
|
||||||
|
encodec_quant_block block = quantizer->blocks[i];
|
||||||
|
|
||||||
|
// compute distance
|
||||||
|
// [seq_length, n_bins]
|
||||||
|
struct ggml_tensor *dp = ggml_scale(
|
||||||
|
ctx0, ggml_mul_mat(ctx0, block.embed, residual), -2.0f);
|
||||||
|
|
||||||
|
// [n_bins]
|
||||||
|
struct ggml_tensor *sqr_embed = ggml_sqr(ctx0, block.embed);
|
||||||
|
struct ggml_tensor *sqr_embed_nrm = ggml_sum_rows(ctx0, sqr_embed);
|
||||||
|
|
||||||
|
// [seq_length]
|
||||||
|
struct ggml_tensor *sqr_inp = ggml_sqr(ctx0, residual);
|
||||||
|
struct ggml_tensor *sqr_inp_nrm = ggml_sum_rows(ctx0, sqr_inp);
|
||||||
|
|
||||||
|
// [seq_length, n_bins]
|
||||||
|
struct ggml_tensor *dist = ggml_add(ctx0, ggml_repeat(ctx0, sqr_inp_nrm, dp), dp);
|
||||||
|
dist = ggml_add(ctx0, ggml_repeat(ctx0, ggml_transpose(ctx0, sqr_embed_nrm), dist), dist);
|
||||||
|
dist = ggml_neg(ctx0, dist);
|
||||||
|
|
||||||
|
// take the argmax over the column dimension
|
||||||
|
// [seq_length]
|
||||||
|
indices = ggml_argmax(ctx0, dist);
|
||||||
|
|
||||||
|
// look up in embedding table
|
||||||
|
struct ggml_tensor *quantized = ggml_get_rows(ctx0, block.embed, indices);
|
||||||
|
|
||||||
|
residual = ggml_sub(ctx0, residual, quantized);
|
||||||
|
|
||||||
|
codes = ggml_set_1d(ctx0, codes, indices, i * codes->nb[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return codes;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor *encodec_forward_quantizer_decode(
|
||||||
|
const struct encodec_quantizer *quantizer, struct ggml_context *ctx0,
|
||||||
|
struct ggml_tensor *codes, const int hidden_dim, const int n_bins, const int sr, const int bandwidth,
|
||||||
|
const int hop_length) {
|
||||||
|
|
||||||
|
if (!codes) {
|
||||||
|
fprintf(stderr, "%s: null input tensor\n", __func__);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int seq_length = codes->ne[0];
|
||||||
|
|
||||||
|
const int frame_rate = (int)ceilf(sr / hop_length);
|
||||||
|
const int n_q = get_num_quantizers_for_bandwidth(n_bins, frame_rate, bandwidth);
|
||||||
|
|
||||||
|
assert(n_q == codes->ne[1]);
|
||||||
|
|
||||||
|
struct ggml_tensor *quantized_out = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length);
|
||||||
|
ggml_set_input(quantized_out);
|
||||||
|
ggml_set_name(quantized_out, "quantized_out");
|
||||||
|
|
||||||
|
for (int i = 0; i < n_q; i++) {
|
||||||
|
encodec_quant_block block = quantizer->blocks[i];
|
||||||
|
|
||||||
|
struct ggml_tensor *indices = ggml_view_1d(ctx0, codes, seq_length, i * codes->nb[1]);
|
||||||
|
struct ggml_tensor *quantized = ggml_get_rows(ctx0, block.embed, indices);
|
||||||
|
|
||||||
|
quantized_out = ggml_add(ctx0, quantized_out, quantized);
|
||||||
|
}
|
||||||
|
|
||||||
|
quantized_out = ggml_cont(ctx0, ggml_transpose(ctx0, quantized_out));
|
||||||
|
|
||||||
|
return quantized_out;
|
||||||
|
}
|
30
vall_e.cpp/include/utils.h
Normal file
30
vall_e.cpp/include/utils.h
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||||
|
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||||
|
|
||||||
|
const size_t MB = 1024 * 1024;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void read_safe(std::ifstream &infile, T &dest) {
|
||||||
|
infile.read((char *)&dest, sizeof(T));
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t get_num_codebooks(float bandwidth, int hop_length, float sample_rate) {
|
||||||
|
// The number of codebooks is determined by the bandwidth selected.
|
||||||
|
// Supported bandwidths are 1.5kbps (n_q = 2), 3 kbps (n_q = 4), 6 kbps (n_q = 8),
|
||||||
|
// 12 kbps (n_q = 16) and 24kbps (n_q = 32).
|
||||||
|
return (int32_t)ceilf(1000 * bandwidth / (ceilf(sample_rate / hop_length) * 10));
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t get_bandwidth_per_quantizer(int bins, float frame_rate) {
|
||||||
|
return log2f((float)bins) * frame_rate;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t get_num_quantizers_for_bandwidth(int bins, float frame_rate, float bandwidth) {
|
||||||
|
float bw_per_q = get_bandwidth_per_quantizer(bins, frame_rate);
|
||||||
|
int32_t n_q = MAX(1, floorf(bandwidth * 1000 / bw_per_q));
|
||||||
|
return n_q;
|
||||||
|
}
|
|
@ -6,7 +6,10 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <regex>
|
||||||
|
#include <codecvt>
|
||||||
|
|
||||||
|
// this technically can be used to initialize the map directly
|
||||||
io_t io_ranges[] = {
|
io_t io_ranges[] = {
|
||||||
{ "text", 0, 256, 9, },
|
{ "text", 0, 256, 9, },
|
||||||
{ "rvq_l", 256, 264, -1, },
|
{ "rvq_l", 256, 264, -1, },
|
||||||
|
@ -36,6 +39,18 @@ io_t io_ranges[] = {
|
||||||
{ "resps|NAR:0:0", 16677, 17702, 8 },
|
{ "resps|NAR:0:0", 16677, 17702, 8 },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// stored here because I tokenize the merges
|
||||||
|
// I can't be assed to figure out the tokenizer right now
|
||||||
|
// u32string because encoding agony
|
||||||
|
std::unordered_map<std::u32string, token_t> vocab = {
|
||||||
|
{U"<unk>",0},{U"<bos>",1},{U"</eos>",2},{U"<mask>",3},{U" ",4},{U"ᵝ",4},{U"!",5},{U"\"",6},{U"(",7},{U"{",7},{U"[",7},{U")",8},{U"}",8},{U"]",8},{U",",9},{U"-",10},{U".",11},{U"1",211},{U"—",10},{U"“",6},{U"”",81},{U"ˇ",6},{U"ˉ",12},{U"ˊ",79},{U"ˋ",80},{U"_",81},{U":",13},{U";",14},{U"?",15},{U"a",16},{U"ä",16},{U"ɒ",16},{U"b",17},{U"c",18},{U"d",19},{U"e",20},{U"f",21},{U"h",22},{U"i",23},{U"ĩ",23},{U"j",24},{U"k",25},{U"l",26},{U"m",27},{U"n",28},{U"ɴ",28},{U"ɲ",28},{U"o",29},{U"̞",29},{U"p",30},{U"ɸ",30},{U"q",31},{U"r",32},{U"ɽ",32},{U"ʁ",32},{U"s",33},{U"t",34},{U"u",35},{U"ø",35},{U"œ",35},{U"y",35},{U"ɣ",35},{U"ũ",35},{U"v",36},{U"w",37},{U"ʍ",37},{U"x",38},{U"z",39},{U"¡",40},{U"«",41},{U"»",42},{U"¿",43},{U"æ",44},{U"ç",45},{U"ð",46},{U"ŋ",47},{U"ɐ",48},{U"ɑ",49},{U"ɔ",50},{U"ɕ",51},{U"ə",52},{U"ɚ",53},{U"ɛ",54},{U"ɜ",55},{U"ɟ",56},{U"ɡ",57},{U"ɪ",58},{U"ɬ",59},{U"ɯ",60},{U"ɹ",61},{U"ɾ",62},{U"ʃ",63},{U"ʈ",64},{U"ʊ",65},{U"ʋ",66},{U"ʌ",67},{U"ʑ",68},{U"ʒ",69},{U"ʔ",70},{U"ʲ",71},{U"ˈ",72},{U"ˌ",73},{U"ː",74},{U"̃",75},{U"̩",76},{U"θ",77},{U"ᵻ",78},{U"…",82},{U"ˈɛ",83},{U"iː",84},{U"aɪ",85},{U"nd",86},{U"ˈɪ",87},{U"eɪ",88},{U"ˈæ",89},{U"ðə",90},{U"oʊ",91},{U"ɑː",92},{U"ˈeɪ",93},{U"ən",94},{U"uː",95},{U"ˈʌ",96},{U"ˈaɪ",97},{U"st",98},{U"ˈɔ",99},{U"ˈoʊ",100},{U"ˈiː",101},{U"ˈɑː",102},{U"ænd",103},{U"ːɹ",104},{U"ɪŋ",105},{U"ɜː",106},{U"ɪn",107},{U"tə",108},{U"ʌv",109},{U"aʊ",110},{U"əl",111},{U"ˈuː",112},{U"tʃ",113},{U"ɪz",114},{U"ˈɜː",115},{U"ˌʌ",116},{U"æt",117},{U"dʒ",118},{U"ˈɔː",119},{U"ɪt",120},{U"ˈaʊ",121},{U"ɚɹ",122},{U"ˈɛn",123},{U"wʌ",124},{U"li",125},{U"hiː",126},{U"ˌɛ",127},{U"wɪ",128},{U"wʌz",129},{U"ðæt",130},{U"juː",131},{U"oːɹ",132},{U"ðɪ",133},{U"sˈɛ",134},{U"ˌɪ",135},{U"ˈɑːɹ",136},{U"nt",137},{U"ˈʊ",138},{U"ənt",139},{U"hɪz",140},{U"ˌɑː",141},{U"hæ",142},{U"ɔːɹ",143},{U"ˈɛɹ",144},{U"wɪð",145},{U"ᵻd",146},{U"ˈoːɹ",147},{U"pɹ",148},{U"ˈɔːl",149},{U"mˌ",150},{U"ʃən",151},{U"kt",152},{U"ˌoʊ",153},{U"ˈɔːɹ",154},{U"fɹ",155},{U"æz",156},{U"ˌʌt",157},{U"ʃiː",158},{U"ˈɛl",159},{U"ˌaʊ",160},{U"ˈʌn",161},{U"əs",162},{U"hɜː",163},{U"lˈaɪ",164},{U"ˈæn",165},{U"ˈɪɹ",166},{U"ʊd",167},{U"ɹᵻ",168},{U"ld",169},{U"bˌʌt",170},{U"ks",171},{U"nˈoʊ",172},{U"hæd",173},{U"ɾɚ",174},{U"ɛɹ",175},{U"ˈɪŋ",176},{U"ɡɹ",177},{U"nˌɑː",178},{U"ɔn",179},{U"vɚ",180},{U"maɪ",181},{U"fɔːɹ",182},{U"ðɚ",183},{U"tʊ",184},{U"ðɛɹ",185},{U"nˌɑːt",186},{U"ˈʌm",187},{U"tɹ",188},{U"sˈiː",189},{U"ʌvðə",190},{U"mˈɪ",191},{U"hˈæ",192},{U"ˌɪm",193},{U"lˈeɪ",194},{U"ɪk",195},{U"sp",196},{U"hˌɪm",197},{U"ɐn",198},{U"ðeɪ",199},{U"lˈɪ",200},{U"ɾi",201},{U"lˈɛ",202},{U"bɹ",203},{U"kɹ",204},{U"lˈæ",205},{U"ˈɪl",206},{U"jˈuː",207},{U"ʌm",208},{U"mˌiː",209},{U"bᵻ",210},{U"wˈʌn",211},{U"ˌɪn",212},{U"ˈɪn",213},{U"ˈoʊn",214},{U"sˈɛd",215},{U"biː",216},{U"ˈɛd",217},{U"ˈaɪt",218},{U"baɪ",219},{U"fɹʌm",220},{U"ɪs",221},{U"ɚz",222},{U"ðɪs",223},{U"əns",224},{U"bəl",225},{U"ɪf",226},{U"ɪnðə",227},{U"əm",228},{U"ᵻz",229},{U"ˌuː",230},{U"wˈeɪ",231},{U"ft",232},{U"wiː",233},{U"stɹ",234},{U"lˈiː",235},{U"iːz",236},{U"pt",237},{U"jʊ",238},{U"ɚd",239},{U"ˌaɪ",240},{U"kw",241},{U"ˌɔn",242},{U"ˈaɪd",243},{U"ɪm",244},{U"ˈʌst",245},{U"ˈoʊld",246},{U"ts",247},{U"ˌɪtʃ",248},{U"sˌoʊ",249},{U"dˈɪ",250},{U"ɑːɹ",251},{U"hɐ",252},{U"sˈeɪ",253},{U"ɾᵻd",254},{U"wˌɪtʃ",255},
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<merge_entry_t> vocab_merges = {
|
||||||
|
{U"ˈ", U"ɛ"},{U"i", U"ː"},{U"a", U"ɪ"},{U"n", U"d"},{U"ˈ", U"ɪ"},{U"e", U"ɪ"},{U"ˈ", U"æ"},{U"ð", U"ə"},{U"o", U"ʊ"},{U"ɑ", U"ː"},{U"ˈ", U"eɪ"},{U"ə", U"n"},{U"u", U"ː"},{U"ˈ", U"ʌ"},{U"ˈ", U"aɪ"},{U"s", U"t"},{U"ˈ", U"ɔ"},{U"ˈ", U"oʊ"},{U"ˈ", U"iː"},{U"ˈ", U"ɑː"},{U"æ", U"nd"},{U"ː", U"ɹ"},{U"ɪ", U"ŋ"},{U"ɜ", U"ː"},{U"ɪ", U"n"},{U"t", U"ə"},{U"ʌ", U"v"},{U"a", U"ʊ"},{U"ə", U"l"},{U"ˈ", U"uː"},{U"t", U"ʃ"},{U"ɪ", U"z"},{U"ˈ", U"ɜː"},{U"ˌ", U"ʌ"},{U"æ", U"t"},{U"d", U"ʒ"},{U"ˈɔ", U"ː"},{U"ɪ", U"t"},{U"ˈ", U"aʊ"},{U"ɚ", U"ɹ"},{U"ˈɛ", U"n"},{U"w", U"ʌ"},{U"l", U"i"},{U"h", U"iː"},{U"ˌ", U"ɛ"},{U"w", U"ɪ"},{U"wʌ", U"z"},{U"ð", U"æt"},{U"j", U"uː"},{U"o", U"ːɹ"},{U"ð", U"ɪ"},{U"s", U"ˈɛ"},{U"ˌ", U"ɪ"},{U"ˈɑː", U"ɹ"},{U"n", U"t"},{U"ˈ", U"ʊ"},{U"ən", U"t"},{U"h", U"ɪz"},{U"ˌ", U"ɑː"},{U"h", U"æ"},{U"ɔ", U"ːɹ"},{U"ˈɛ", U"ɹ"},{U"wɪ", U"ð"},{U"ᵻ", U"d"},{U"ˈ", U"oːɹ"},{U"p", U"ɹ"},{U"ˈɔː", U"l"},{U"m", U"ˌ"},{U"ʃ", U"ən"},{U"k", U"t"},{U"ˌ", U"oʊ"},{U"ˈɔ", U"ːɹ"},{U"f", U"ɹ"},{U"æ", U"z"},{U"ˌʌ", U"t"},{U"ʃ", U"iː"},{U"ˈɛ", U"l"},{U"ˌ", U"aʊ"},{U"ˈʌ", U"n"},{U"ə", U"s"},{U"h", U"ɜː"},{U"l", U"ˈaɪ"},{U"ˈæ", U"n"},{U"ˈɪ", U"ɹ"},{U"ʊ", U"d"},{U"ɹ", U"ᵻ"},{U"l", U"d"},{U"b", U"ˌʌt"},{U"k", U"s"},{U"n", U"ˈoʊ"},{U"hæ", U"d"},{U"ɾ", U"ɚ"},{U"ɛ", U"ɹ"},{U"ˈɪ", U"ŋ"},{U"ɡ", U"ɹ"},{U"n", U"ˌɑː"},{U"ɔ", U"n"},{U"v", U"ɚ"},{U"m", U"aɪ"},{U"f", U"ɔːɹ"},{U"ð", U"ɚ"},{U"t", U"ʊ"},{U"ð", U"ɛɹ"},{U"nˌɑː", U"t"},{U"ˈʌ", U"m"},{U"t", U"ɹ"},{U"s", U"ˈiː"},{U"ʌv", U"ðə"},{U"m", U"ˈɪ"},{U"h", U"ˈæ"},{U"ˌɪ", U"m"},{U"l", U"ˈeɪ"},{U"ɪ", U"k"},{U"s", U"p"},{U"h", U"ˌɪm"},{U"ɐ", U"n"},{U"ð", U"eɪ"},{U"l", U"ˈɪ"},{U"ɾ", U"i"},{U"l", U"ˈɛ"},{U"b", U"ɹ"},{U"k", U"ɹ"},{U"l", U"ˈæ"},{U"ˈɪ", U"l"},{U"j", U"ˈuː"},{U"ʌ", U"m"},{U"mˌ", U"iː"},{U"b", U"ᵻ"},{U"w", U"ˈʌn"},{U"ˌ", U"ɪn"},{U"ˈɪ", U"n"},{U"ˈoʊ", U"n"},{U"sˈɛ", U"d"},{U"b", U"iː"},{U"ˈɛ", U"d"},{U"ˈaɪ", U"t"},{U"b", U"aɪ"},{U"fɹ", U"ʌm"},{U"ɪ", U"s"},{U"ɚ", U"z"},{U"ðɪ", U"s"},{U"ən", U"s"},{U"b", U"əl"},{U"ɪ", U"f"},{U"ɪn", U"ðə"},{U"ə", U"m"},{U"ᵻ", U"z"},{U"ˌ", U"uː"},{U"w", U"ˈeɪ"},{U"f", U"t"},{U"w", U"iː"},{U"st", U"ɹ"},{U"l", U"ˈiː"},{U"iː", U"z"},{U"p", U"t"},{U"j", U"ʊ"},{U"ɚ", U"d"},{U"ˌ", U"aɪ"},{U"k", U"w"},{U"ˌ", U"ɔn"},{U"ˈaɪ", U"d"},{U"ɪ", U"m"},{U"ˈʌ", U"st"},{U"ˈoʊ", U"ld"},{U"t", U"s"},{U"ˌɪ", U"tʃ"},{U"s", U"ˌoʊ"},{U"d", U"ˈɪ"},{U"ɑː", U"ɹ"},{U"h", U"ɐ"},{U"s", U"ˈeɪ"},{U"ɾ", U"ᵻd"},{U"w", U"ˌɪtʃ"},
|
||||||
|
};
|
||||||
|
std::unordered_map<std::string, merge_entry_t> vocab_merge_map = {};
|
||||||
|
|
||||||
std::vector<float> VALL_E_API read_2d_tensor( struct ggml_tensor* tensor ) {
|
std::vector<float> VALL_E_API read_2d_tensor( struct ggml_tensor* tensor ) {
|
||||||
size_t size = tensor->ne[0] * tensor->ne[1];
|
size_t size = tensor->ne[0] * tensor->ne[1];
|
||||||
std::vector<float> res( size );
|
std::vector<float> res( size );
|
||||||
|
@ -109,11 +124,11 @@ void VALL_E_API vall_e_inputs_map_init( io_map_t& io_map, llama_model* model ) {
|
||||||
|
|
||||||
int32_t ctx_size = 24 * 2 * ggml_tensor_overhead(); // 24 embeddings + 24 output heads (generous) (should only really need to do this for output heads since we manually handle embeddings)
|
int32_t ctx_size = 24 * 2 * ggml_tensor_overhead(); // 24 embeddings + 24 output heads (generous) (should only really need to do this for output heads since we manually handle embeddings)
|
||||||
struct ggml_init_params params = {
|
struct ggml_init_params params = {
|
||||||
/*.mem_size =*/ ctx_size,
|
/*.mem_size =*/ ctx_size,
|
||||||
/*.mem_buffer =*/ NULL,
|
/*.mem_buffer =*/ NULL,
|
||||||
/*.no_alloc =*/ true,
|
/*.no_alloc =*/ true,
|
||||||
};
|
};
|
||||||
io_map.ctx = ggml_init(params);
|
io_map.ctx = ggml_init(params);
|
||||||
|
|
||||||
// to-do: figure a nicer way to do this
|
// to-do: figure a nicer way to do this
|
||||||
#if LLAMA_CPP_USE_VALL_E_ARCH
|
#if LLAMA_CPP_USE_VALL_E_ARCH
|
||||||
|
@ -207,72 +222,72 @@ void VALL_E_API batch_add( llama_batch& batch, token_t id, int n_embd, const flo
|
||||||
std::vector<float> VALL_E_API read_audio_from_disk( const std::string& path ) {
|
std::vector<float> VALL_E_API read_audio_from_disk( const std::string& path ) {
|
||||||
std::vector<float> res;
|
std::vector<float> res;
|
||||||
|
|
||||||
uint32_t channels;
|
uint32_t channels;
|
||||||
uint32_t sample_rate;
|
uint32_t sample_rate;
|
||||||
drwav_uint64 total_frame_count;
|
drwav_uint64 total_frame_count;
|
||||||
|
|
||||||
float * raw_audio = drwav_open_file_and_read_pcm_frames_f32(path.c_str(), &channels, &sample_rate, &total_frame_count, NULL);
|
float * raw_audio = drwav_open_file_and_read_pcm_frames_f32(path.c_str(), &channels, &sample_rate, &total_frame_count, NULL);
|
||||||
|
|
||||||
if (raw_audio == NULL) {
|
if (raw_audio == NULL) {
|
||||||
fprintf(stderr, "%s: could not read wav file\n", __func__);
|
fprintf(stderr, "%s: could not read wav file\n", __func__);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sample_rate != 24000) {
|
if (sample_rate != 24000) {
|
||||||
fprintf(stderr, "%s: wav file is wrong sample rate\n", __func__);
|
fprintf(stderr, "%s: wav file is wrong sample rate\n", __func__);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr, "\n%s: Number of frames read = %lld.\n", __func__, total_frame_count);
|
fprintf(stderr, "\n%s: Number of frames read = %lld.\n", __func__, total_frame_count);
|
||||||
|
|
||||||
res.resize(total_frame_count);
|
res.resize(total_frame_count);
|
||||||
memcpy(res.data(), raw_audio, total_frame_count * sizeof(float));
|
memcpy(res.data(), raw_audio, total_frame_count * sizeof(float));
|
||||||
|
|
||||||
drwav_free(raw_audio, NULL);
|
drwav_free(raw_audio, NULL);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
// writes a waveform to disk
|
// writes a waveform to disk
|
||||||
void VALL_E_API write_audio_to_disk( const std::vector<float>& wavform, const std::string& path ) {
|
void VALL_E_API write_audio_to_disk( const std::vector<float>& wavform, const std::string& path ) {
|
||||||
drwav_data_format format;
|
drwav_data_format format;
|
||||||
format.bitsPerSample = 32;
|
format.bitsPerSample = 32;
|
||||||
format.sampleRate = 24000;
|
format.sampleRate = 24000;
|
||||||
format.container = drwav_container_riff;
|
format.container = drwav_container_riff;
|
||||||
format.channels = 1;
|
format.channels = 1;
|
||||||
format.format = DR_WAVE_FORMAT_IEEE_FLOAT;
|
format.format = DR_WAVE_FORMAT_IEEE_FLOAT;
|
||||||
|
|
||||||
drwav wav;
|
drwav wav;
|
||||||
drwav_init_file_write(&wav, path.c_str(), &format, NULL);
|
drwav_init_file_write(&wav, path.c_str(), &format, NULL);
|
||||||
drwav_uint64 frames = drwav_write_pcm_frames(&wav, wavform.size(), wavform.data());
|
drwav_uint64 frames = drwav_write_pcm_frames(&wav, wavform.size(), wavform.data());
|
||||||
drwav_uninit(&wav);
|
drwav_uninit(&wav);
|
||||||
|
|
||||||
fprintf(stderr, "%s: Number of frames written = %lld.\n", __func__, frames);
|
fprintf(stderr, "%s: Number of frames written = %lld.\n", __func__, frames);
|
||||||
}
|
}
|
||||||
// reads a waveform from disk then encodes it
|
// reads a waveform from disk then encodes it
|
||||||
std::vector<std::vector<int32_t>> VALL_E_API encode_audio( struct encodec_context* ectx, const std::vector<float>& wavform ) {
|
std::vector<std::vector<int32_t>> VALL_E_API encode_audio( struct encodec_context* ectx, const std::vector<float>& wavform ) {
|
||||||
// compress audio
|
// compress audio
|
||||||
if (!encodec_compress_audio(ectx, wavform.data(), wavform.size(), 1)) {
|
if (!encodec_compress_audio(ectx, wavform.data(), wavform.size(), 1)) {
|
||||||
fprintf(stderr, "%s: error during compression \n", __func__);
|
fprintf(stderr, "%s: error during compression \n", __func__);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t* codes_data = encodec_get_codes( ectx );
|
int32_t* codes_data = encodec_get_codes( ectx );
|
||||||
int n_codes = encodec_get_codes_size( ectx );
|
int n_codes = encodec_get_codes_size( ectx );
|
||||||
int n_codebooks = 8;
|
int n_codebooks = 8;
|
||||||
int n_frames = n_codes / n_codebooks;
|
int n_frames = n_codes / n_codebooks;
|
||||||
|
|
||||||
std::vector<std::vector<int32_t>> res(n_codebooks);
|
std::vector<std::vector<int32_t>> res(n_codebooks);
|
||||||
|
|
||||||
for ( auto l = 0; l < n_codebooks; ++l ) {
|
for ( auto l = 0; l < n_codebooks; ++l ) {
|
||||||
res[l].insert( res[l].end(), codes_data + (l * n_frames), codes_data + ((l+1) * n_frames) );
|
res[l].insert( res[l].end(), codes_data + (l * n_frames), codes_data + ((l+1) * n_frames) );
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
// decodes a 2D codebook into a waveform
|
// decodes a 2D codebook into a waveform
|
||||||
std::vector<float> VALL_E_API decode_audio( struct encodec_context* ectx, const std::vector<std::vector<int32_t>>& codes ) {
|
std::vector<float> VALL_E_API decode_audio( struct encodec_context* ectx, const std::vector<std::vector<int32_t>>& codes ) {
|
||||||
int n_codebooks = codes.size();
|
int n_codebooks = codes.size();
|
||||||
int n_frames = codes[0].size();
|
int n_frames = codes[0].size();
|
||||||
|
|
||||||
|
|
||||||
std::vector<int32_t> res;
|
std::vector<int32_t> res;
|
||||||
|
@ -282,16 +297,16 @@ std::vector<float> VALL_E_API decode_audio( struct encodec_context* ectx, const
|
||||||
res.insert( res.end(), codes[l].begin(), codes[l].end() );
|
res.insert( res.end(), codes[l].begin(), codes[l].end() );
|
||||||
}
|
}
|
||||||
|
|
||||||
// decompress audio
|
// decompress audio
|
||||||
if (!encodec_decompress_audio(ectx, res.data(), res.size(), N_THREADS)) {
|
if (!encodec_decompress_audio(ectx, res.data(), res.size(), N_THREADS)) {
|
||||||
fprintf(stderr, "%s: error during decompression\n", __func__);
|
fprintf(stderr, "%s: error during decompression\n", __func__);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
// write reconstructed audio on disk
|
// write reconstructed audio on disk
|
||||||
const float* audio_data = encodec_get_audio(ectx);
|
const float* audio_data = encodec_get_audio(ectx);
|
||||||
const int audio_size = encodec_get_audio_size(ectx);
|
const int audio_size = encodec_get_audio_size(ectx);
|
||||||
return std::vector<float>(audio_data, audio_data + audio_size);
|
return std::vector<float>(audio_data, audio_data + audio_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// sums embeddings over a 2D "tensor"
|
// sums embeddings over a 2D "tensor"
|
||||||
|
@ -484,7 +499,7 @@ std::vector<token_t> VALL_E_API generate( vall_e_context_t* ctx, vall_e_inputs_t
|
||||||
// to-do: figure this out......
|
// to-do: figure this out......
|
||||||
{
|
{
|
||||||
llama_set_causal_attn( ctx->llama.ctx, causal ); // to-do: fix GGML_ASSERT(mask->ne[0] == a->ne[0])
|
llama_set_causal_attn( ctx->llama.ctx, causal ); // to-do: fix GGML_ASSERT(mask->ne[0] == a->ne[0])
|
||||||
// *const_cast<bool*>(&model->hparams.causal_attn) = true; // force set this
|
// *const_cast<bool*>(&model->hparams.causal_attn) = true; // force set this
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<token_t> output_tokens;
|
std::vector<token_t> output_tokens;
|
||||||
|
@ -702,20 +717,78 @@ std::vector<token_t> VALL_E_API generate( vall_e_context_t* ctx, vall_e_inputs_t
|
||||||
return output_tokens;
|
return output_tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string string_replace( const std::string& string, const std::string& search, const std::string& replace ) {
|
||||||
|
std::string res = string;
|
||||||
|
size_t start_pos;
|
||||||
|
while ( (start_pos = res.find(search)) != std::string::npos ) {
|
||||||
|
res.replace(start_pos, search.length(), replace);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<token_t> VALL_E_API phonemize( vall_e_context_t* ctx, const std::string& text, const std::string& language ) {
|
std::vector<token_t> VALL_E_API phonemize( vall_e_context_t* ctx, const std::string& text, const std::string& language ) {
|
||||||
return {1,22,111,100,4,37,115,169,11,2}; // <bos>hˈɛloː ʋˈɔrlt</eos>
|
std::vector<token_t> tokens;
|
||||||
/*
|
|
||||||
const int n_prompt = -llama_tokenize(model, inputs.phonemes.c_str(), inputs.phonemes.size(), NULL, 0, true, true);
|
// phonemize text
|
||||||
// allocate space for the tokens and tokenize the inputs.phonemes
|
std::string espeak_language = "en";
|
||||||
inputs.phn.resize(n_prompt);
|
if ( language == "en" ) espeak_language = "en-us";
|
||||||
if (llama_tokenize(model, inputs.phonemes.c_str(), inputs.phonemes.size(), inputs.phn.data(), inputs.phn.size(), true, true) < 0) {
|
else if ( language == "fr" ) espeak_language = "fr-fr";
|
||||||
fprintf(stderr, "%s: error: failed to tokenize: %s\n", __func__, inputs.phonemes.c_str());
|
else if ( language == "zh" ) espeak_language = "cmn-latn-pinyin";
|
||||||
return 1;
|
espeak_SetVoiceByName(espeak_language.c_str());
|
||||||
|
|
||||||
|
const char* text_c_str = text.c_str();
|
||||||
|
const char* phonemes = espeak_TextToPhonemes((const void**) &text_c_str, espeakCHARS_UTF8, espeakPHONEMES_IPA);
|
||||||
|
|
||||||
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv_utf8_utf32;
|
||||||
|
std::u32string unicode_phonemes = conv_utf8_utf32.from_bytes(phonemes);
|
||||||
|
|
||||||
|
// manual tokenization because llama tokenizer isn't cooperating
|
||||||
|
// to-do: handle merges
|
||||||
|
tokens.emplace_back(1);
|
||||||
|
for (auto& phone : unicode_phonemes ) {
|
||||||
|
std::u32string phone_str;
|
||||||
|
phone_str += phone;
|
||||||
|
// place <unk> first
|
||||||
|
auto& token = tokens.emplace_back(0);
|
||||||
|
// update if found
|
||||||
|
if ( vocab.count( phone_str ) > 0 ) {
|
||||||
|
token = vocab[phone_str];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( auto& token : inputs.phn ) printf("%i ", token );
|
// handle merges (skip <bos>)
|
||||||
printf("\n");
|
for ( auto i = 1; i < tokens.size() - 1; ++i ) {
|
||||||
*/
|
auto& cur = tokens[i];
|
||||||
|
auto& next = tokens[i+1];
|
||||||
|
std::string key = std::to_string(cur) + ":" + std::to_string(next);
|
||||||
|
// not a merge
|
||||||
|
if ( !vocab_merge_map.count(key) )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// get merge entry
|
||||||
|
auto& merge = vocab_merge_map[key];
|
||||||
|
// update with merged token
|
||||||
|
cur = merge.resolved_token;
|
||||||
|
// erase at next token
|
||||||
|
tokens.erase(tokens.begin() + i + 1);
|
||||||
|
// back iterate to check for more merges at next iteration
|
||||||
|
--i;
|
||||||
|
}
|
||||||
|
tokens.emplace_back(2);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
// to-do: fix terminate called after throwing an instance of 'std::out_of_range'
|
||||||
|
// deduce token count
|
||||||
|
const int n_tokens = -llama_tokenize(ctx->llama.model, phonemes.c_str(), phonemes.size(), NULL, 0, true, true);
|
||||||
|
tokens.resize(n_tokens);
|
||||||
|
// tokenize
|
||||||
|
if ( llama_tokenize(ctx->llama.model, phonemes.c_str(), phonemes.size(), tokens.data(), tokens.size(), true, true) < 0 ) {
|
||||||
|
fprintf(stderr, "%s: error: failed to tokenize: %s\n", __func__, phonemes.c_str());
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
vall_e_context_t* VALL_E_API vall_e_load( const vall_e_context_params_t& params ) {
|
vall_e_context_t* VALL_E_API vall_e_load( const vall_e_context_params_t& params ) {
|
||||||
|
@ -751,7 +824,7 @@ vall_e_context_t* VALL_E_API vall_e_load( const vall_e_context_params_t& params
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
// setup encodec.cpp
|
// setup encodec.cpp
|
||||||
ctx->encodec.ctx = encodec_load_model(params.encodec_path.c_str(), 0, params.gpu_layers);
|
ctx->encodec.ctx = encodec_load_model(params.encodec_path.c_str(), 0, params.gpu_layers);
|
||||||
if ( !ctx->encodec.ctx ) {
|
if ( !ctx->encodec.ctx ) {
|
||||||
fprintf(stderr, "%s: error during loading model\n", __func__);
|
fprintf(stderr, "%s: error during loading model\n", __func__);
|
||||||
|
@ -760,9 +833,24 @@ vall_e_context_t* VALL_E_API vall_e_load( const vall_e_context_params_t& params
|
||||||
encodec_set_target_bandwidth(ctx->encodec.ctx, 6);
|
encodec_set_target_bandwidth(ctx->encodec.ctx, 6);
|
||||||
encodec_set_sample_rate(ctx->encodec.ctx, 24000);
|
encodec_set_sample_rate(ctx->encodec.ctx, 24000);
|
||||||
|
|
||||||
|
// setup espeak
|
||||||
|
espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, NULL, 0);
|
||||||
|
|
||||||
// setup vall_e.cpp
|
// setup vall_e.cpp
|
||||||
vall_e_inputs_map_init( ctx->io_map, ctx->llama.model );
|
vall_e_inputs_map_init( ctx->io_map, ctx->llama.model );
|
||||||
|
|
||||||
|
// setup vocab things
|
||||||
|
for ( auto& entry : vocab_merges ) {
|
||||||
|
entry.resolved = entry.pre+entry.post;
|
||||||
|
|
||||||
|
entry.pre_token = vocab[entry.pre];
|
||||||
|
entry.post_token = vocab[entry.post];
|
||||||
|
entry.resolved_token = vocab[entry.resolved];
|
||||||
|
|
||||||
|
std::string key = std::to_string(entry.pre_token) + ":" + std::to_string(entry.post_token);
|
||||||
|
vocab_merge_map[key] = entry;
|
||||||
|
}
|
||||||
|
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
vall_e_inputs_t vall_e_prepare_inputs( vall_e_context_t* ctx, const std::string& text, const std::string& prompt_path, const std::string& language ) {
|
vall_e_inputs_t vall_e_prepare_inputs( vall_e_context_t* ctx, const std::string& text, const std::string& prompt_path, const std::string& language ) {
|
||||||
|
@ -785,7 +873,7 @@ vall_e_audio_codes_t vall_e_generate( vall_e_context_t* ctx, vall_e_inputs_t& in
|
||||||
std::vector<token_t> output_tokens;
|
std::vector<token_t> output_tokens;
|
||||||
if ( modality == MODALITY_NAR_LEN ) {
|
if ( modality == MODALITY_NAR_LEN ) {
|
||||||
// inference len
|
// inference len
|
||||||
int len = 75;
|
int len = 0;
|
||||||
if ( !len ) {
|
if ( !len ) {
|
||||||
inputs.task = "len";
|
inputs.task = "len";
|
||||||
output_tokens = generate( ctx, inputs, 5, INFERENCE_MODE_LEN );
|
output_tokens = generate( ctx, inputs, 5, INFERENCE_MODE_LEN );
|
||||||
|
@ -826,6 +914,7 @@ vall_e_audio_codes_t vall_e_generate( vall_e_context_t* ctx, vall_e_inputs_t& in
|
||||||
return inputs.resp;
|
return inputs.resp;
|
||||||
}
|
}
|
||||||
void VALL_E_API vall_e_free( vall_e_context_t* ctx ) {
|
void VALL_E_API vall_e_free( vall_e_context_t* ctx ) {
|
||||||
|
espeak_Terminate();
|
||||||
encodec_free(ctx->encodec.ctx);
|
encodec_free(ctx->encodec.ctx);
|
||||||
llama_free(ctx->llama.ctx);
|
llama_free(ctx->llama.ctx);
|
||||||
llama_free_model(ctx->llama.model);
|
llama_free_model(ctx->llama.model);
|
||||||
|
@ -843,12 +932,13 @@ int main( int argc, char** argv ) {
|
||||||
params.cpu_threads = N_THREADS;
|
params.cpu_threads = N_THREADS;
|
||||||
vall_e_context_t* ctx = vall_e_load( params );
|
vall_e_context_t* ctx = vall_e_load( params );
|
||||||
|
|
||||||
|
std::string text = "Hello world.";
|
||||||
std::string prompt_path = "./data/prom.wav";
|
std::string prompt_path = "./data/prom.wav";
|
||||||
std::string output_path = "./data/resp.wav";
|
std::string output_path = "./data/resp.wav";
|
||||||
std::string language = "en";
|
std::string language = "en";
|
||||||
int modality = MODALITY_NAR_LEN;
|
int modality = MODALITY_NAR_LEN;
|
||||||
|
|
||||||
auto inputs = vall_e_prepare_inputs( ctx, "Hello world.", prompt_path, language );
|
auto inputs = vall_e_prepare_inputs( ctx, text, prompt_path, language );
|
||||||
auto output_audio_codes = vall_e_generate( ctx, inputs, modality );
|
auto output_audio_codes = vall_e_generate( ctx, inputs, modality );
|
||||||
write_audio_to_disk( decode_audio( ctx->encodec.ctx, output_audio_codes ), output_path );
|
write_audio_to_disk( decode_audio( ctx->encodec.ctx, output_audio_codes ), output_path );
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,16 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "llama.h"
|
// C++ deps
|
||||||
#include "encodec.h"
|
|
||||||
|
|
||||||
#include "dr_wav.h"
|
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
|
// external deps
|
||||||
|
#include <llama.h>
|
||||||
|
#include <encodec.h>
|
||||||
|
#include <dr_wav.h>
|
||||||
|
#include <espeak-ng/speak_lib.h>
|
||||||
|
|
||||||
// to-do: copy over import/export stuff from engine project (because I don't remember how I set it up in <uf/config.h>)
|
// to-do: copy over import/export stuff from engine project (because I don't remember how I set it up in <uf/config.h>)
|
||||||
#define VALL_E_API
|
#define VALL_E_API
|
||||||
|
|
||||||
|
@ -73,6 +75,16 @@ struct score_t {
|
||||||
bool operator<( const score_t& that ) const { return this->value < that.value; }
|
bool operator<( const score_t& that ) const { return this->value < that.value; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct merge_entry_t {
|
||||||
|
std::u32string pre;
|
||||||
|
std::u32string post;
|
||||||
|
std::u32string resolved;
|
||||||
|
|
||||||
|
token_t pre_token;
|
||||||
|
token_t post_token;
|
||||||
|
token_t resolved_token;
|
||||||
|
};
|
||||||
|
|
||||||
struct vall_e_context_params_t {
|
struct vall_e_context_params_t {
|
||||||
std::string model_path;
|
std::string model_path;
|
||||||
std::string encodec_path;
|
std::string encodec_path;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user