crammed in vall_e.cpp support to finally justify creating it (and a bunch of other things)

2025-08-02 23:02:49 -05:00 · 2025-08-02 23:02:49 -05:00 · 73ca9bb168
commit 73ca9bb168
parent 4f1ce314a5
52 changed files with 13972 additions and 37 deletions
--- a/.gitignore
+++ b/.gitignore
@ -55,3 +55,5 @@
 *.otf
 *.bin
 models/
+llm/
+tmp/
--- a/11
+++ b/11
@ -62,10 +62,10 @@ LIBS 					+= -L$(ENGINE_LIB_DIR) -L$(LIB_DIR)/$(PREFIX_PATH) -L$(LIB_DIR)/$(ARCH
 	
 LINKS 					+= $(UF_LIBS) $(EXT_LIBS) $(DEPS)
 DEPS 					+= 
-FLAGS 					+=
+FLAGS 					+= # -DUF_DEBUG

 ifneq (,$(findstring -DUF_DEBUG,$(FLAGS)))
-	REQ_DEPS 			+= meshoptimizer toml xatlas curl ffx:fsr cpptrace # ncurses openvr draco discord bullet ultralight-ux
+	REQ_DEPS 			+= meshoptimizer toml xatlas curl ffx:fsr cpptrace vall_e # ncurses openvr draco discord bullet ultralight-ux
 	FLAGS 				+= -g
 endif
 ifneq (,$(findstring win64,$(ARCH)))
@ -215,7 +215,7 @@ ifneq (,$(findstring bullet,$(REQ_DEPS)))
 		DEPS 				+= -lbulletdynamics -lbulletcollision -lbulletlinearmath
 	else
 		DEPS 				+= -lBulletDynamics -lBulletCollision -lLinearMath
-		INCS 				+= -I./dep/bullet/
+		INCS 				+= -I./dep/include/bullet/
 	endif
 endif
 ifneq (,$(findstring reactphysics,$(REQ_DEPS)))
@ -248,6 +248,11 @@ endif
 ifneq (,$(findstring toml,$(REQ_DEPS)))
 	FLAGS 				+= -DUF_USE_TOML
 endif
+ifneq (,$(findstring vall_e,$(REQ_DEPS)))
+	FLAGS 				+= -DUF_USE_VALL_E
+	INCS 				+= -I./dep/include/vall_e.cpp/
+	DEPS 				+= -lvall_e
+endif

 # SRCS_DLL 				+= $(wildcard $(ENGINE_SRC_DIR)/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*/*/*.cpp)
 #SRCS_DLL 				+= $(wildcard $(ENGINE_SRC_DIR)/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*/*.cpp) $(wildcard $(ENGINE_SRC_DIR)/*/*/*/*/*.cpp) $(wildcard $(EXT_SRC_DIR)/*.cpp) $(wildcard $(EXT_SRC_DIR)/*/*.cpp) $(wildcard $(EXT_SRC_DIR)/*/*/*.cpp) $(wildcard $(EXT_SRC_DIR)/*/*/*/*.cpp) $(wildcard $(EXT_SRC_DIR)/*/*/*/*/*.cpp)
--- a/README.md
+++ b/README.md
@ -18,7 +18,7 @@ To compile, run `make`. The outputted libraries and executables will be placed i

 ## Run

-Currently, assets are not provided due to size (but mostly due to being test assets).
+Currently, a barebones setup is provided via the [`.zip` bundle](https://github.com/e-c-k-e-r/engine/releases/tag/bundle).

 *If* adequate assets are provided, run `./program.sh` or `make run`. This ensures the path to the required libraries are added to the PATH.

--- a/bin/data/config.json
+++ b/bin/data/config.json
@ -288,6 +288,9 @@
 				"encoding": "msgpack",
 				"compression": "gz"
 			},
+			"vall_e": {
+				"enabled": true
+			},
 			"imgui": {
 				"enabled": true
 			},
--- a/dep/include/vall_e.cpp/decoder.h
+++ b/dep/include/vall_e.cpp/decoder.h
@ -0,0 +1,113 @@
+#pragma once
+
+#include <vector>
+
+#include "ggml.h"
+#include "ggml-alloc.h"
+#include "ggml-backend.h"
+
+#include "lstm.h"
+#include "utils.h"
+
+
+struct encodec_decoder_block {
+    // upsampling layers
+    struct ggml_tensor *us_conv_w;
+    struct ggml_tensor *us_conv_b;
+
+    // conv1
+    struct ggml_tensor *conv_1_w;
+    struct ggml_tensor *conv_1_b;
+
+    // conv2
+    struct ggml_tensor *conv_2_w;
+    struct ggml_tensor *conv_2_b;
+
+    // shortcut
+    struct ggml_tensor *conv_sc_w;
+    struct ggml_tensor *conv_sc_b;
+};
+
+struct encodec_decoder {
+    struct ggml_tensor *init_conv_w;
+    struct ggml_tensor *init_conv_b;
+
+    encodec_lstm lstm;
+
+    struct ggml_tensor *final_conv_w;
+    struct ggml_tensor *final_conv_b;
+
+    std::vector<encodec_decoder_block> blocks;
+};
+
+struct ggml_tensor *encodec_forward_decoder(
+    const struct encodec_decoder *decoder, struct ggml_context *ctx0,
+    struct ggml_tensor *quantized_out, const int *ratios, const int kernel_size, const int res_kernel_size,
+    const int stride) {
+
+    if (!quantized_out) {
+        fprintf(stderr, "%s: null input tensor\n", __func__);
+        return NULL;
+    }
+
+    struct ggml_tensor *inpL = strided_conv_1d(
+        ctx0, quantized_out, decoder->init_conv_w, decoder->init_conv_b, stride);
+
+    // lstm
+    {
+        struct ggml_tensor *cur = inpL;
+
+        const encodec_lstm lstm = decoder->lstm;
+
+        // first lstm layer
+        char l0_prefix[7] = "dec_l0";
+        struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
+            ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix);
+
+        // second lstm layer
+        char l1_prefix[7] = "dec_l1";
+        struct ggml_tensor *out = forward_pass_lstm_unilayer(
+            ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix);
+
+        inpL = ggml_add(ctx0, inpL, out);
+    }
+
+    for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
+        encodec_decoder_block block = decoder->blocks[layer_ix];
+
+        // upsampling layers
+        inpL = ggml_elu(ctx0, inpL);
+
+        inpL = strided_conv_transpose_1d(
+            ctx0, inpL, block.us_conv_w, block.us_conv_b, ratios[layer_ix]);
+
+        struct ggml_tensor *current = inpL;
+
+        // shortcut
+        struct ggml_tensor *shortcut = strided_conv_1d(
+            ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);
+
+        // conv1
+        current = ggml_elu(ctx0, current);
+
+        current = strided_conv_1d(
+            ctx0, current, block.conv_1_w, block.conv_1_b, stride);
+
+        // conv2
+        current = ggml_elu(ctx0, current);
+
+        current = strided_conv_1d(
+            ctx0, current, block.conv_2_w, block.conv_2_b, stride);
+
+        // residual connection
+        inpL = ggml_add(ctx0, current, shortcut);
+    }
+
+    // final conv
+    inpL = ggml_elu(ctx0, inpL);
+
+    struct ggml_tensor *decoded_inp = strided_conv_1d(
+        ctx0, inpL, decoder->final_conv_w, decoder->final_conv_b, stride);
+
+    return decoded_inp;
+}
--- a/dep/include/vall_e.cpp/dr_wav.h
+++ b/dep/include/vall_e.cpp/dr_wav.h
--- a/dep/include/vall_e.cpp/encodec.h
+++ b/dep/include/vall_e.cpp/encodec.h
@ -0,0 +1,184 @@
+/*
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2024 Pierre-Antoine Bannier                                        │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+/*
+ * This file contains the declarations of the structs and functions used in the encodec library.
+ * The library provides functionality for audio compression and decompression using a custom model.
+ * The model consists of an encoder, a quantizer and a decoder, each with their own set of parameters.
+ * The library also provides functions for loading and freeing the model, as well as compressing and decompressing audio data.
+ *
+ */
+#pragma once
+
+#include "ggml-alloc.h"
+#include "ggml-backend.h"
+#include "ggml.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+    struct encodec_context;
+
+    struct encodec_statistics {
+        // The time taken to load the model.
+        int64_t t_load_us;
+        // The time taken to compute the model.
+        int64_t t_compute_us;
+    };
+
+    /**
+     * Loads an encodec model from the specified file path.
+     *
+     * @param model_path The file path to the encodec model.
+     * @param offset The offset (in bytes) to the start of the model in the file.
+     * @param n_gpu_layers The number of GPU layers to use.
+     * @return A pointer to the encodec context struct.
+     */
+    struct encodec_context *encodec_load_model(
+        const char *model_path,
+        const int offset,
+        int n_gpu_layers);
+
+    /**
+     * Sets the target bandwidth for the given encodec context.
+     *
+     * @param ectx The encodec context to set the target bandwidth for.
+     * @param bandwidth The target bandwidth to set, in bits per second.
+     */
+    void encodec_set_target_bandwidth(
+        struct encodec_context *ectx,
+        int bandwidth);
+
+    /**
+     * Sets the sample rate for the given encodec context.
+     *
+     * @param ectx The encodec context to set the target bandwidth for.
+     * @param sample_rate The sample rate to set.
+     */
+    void encodec_set_sample_rate(
+        struct encodec_context *ectx,
+        int sample_rate);
+
+    /**
+     * Reconstructs audio from raw audio data using the specified encodec context.
+     *
+     * @param ectx The encodec context to use for reconstruction.
+     * @param raw_audio The raw audio data to reconstruct.
+     * @param n_samples The number of samples in the raw audio buffer.
+     * @param n_threads The number of threads to use for reconstruction.
+     * @return True if the reconstruction was successful, false otherwise.
+     */
+    bool encodec_reconstruct_audio(
+        struct encodec_context *ectx,
+        const float *raw_audio,
+        const int n_samples,
+        int n_threads);
+
+    /**
+     * Compresses audio data using the specified encodec context.
+     *
+     * @param ectx The encodec context to use for compression.
+     * @param raw_audio The raw audio data to compress.
+     * @param n_samples The number of samples in the raw audio buffer.
+     * @param n_threads The number of threads to use for compression.
+     * @return True if the compression was successful, false otherwise.
+     */
+    bool encodec_compress_audio(
+        struct encodec_context *ectx,
+        const float *raw_audio,
+        const int n_samples,
+        int n_threads);
+
+    /**
+     * Decompresses audio data using the specified encodec context.
+     *
+     * @param ectx The encodec context to use for decompression.
+     * @param codes The compressed audio data to decompress.
+     * @param n_codes The number of codes in the codes buffer.
+     * @param n_threads The number of threads to use for decompression.
+     * @return True if the audio data was successfully decompressed, false otherwise.
+     */
+    bool encodec_decompress_audio(
+        struct encodec_context *ectx,
+        const int32_t *codes,
+        const int n_codes,
+        int n_threads);
+
+    /**
+     * Gets the audio data from the given encodec context.
+     *
+     * @param ectx The encodec context to get the audio data from.
+     * @return A pointer to the audio data.
+    */
+    float * encodec_get_audio(
+        struct encodec_context *ectx);
+
+    /**
+     * Gets the size of the audio data from the given encodec context.
+     *
+     * @param ectx The encodec context to get the audio size from.
+     * @return The size of the audio data.
+    */
+    int encodec_get_audio_size(
+        struct encodec_context *ectx);
+
+    /**
+     * Gets the code data from the given encodec context.
+     *
+     * @param ectx The encodec context to get the code data from.
+     * @return A pointer to the code data.
+    */
+    int32_t * encodec_get_codes(
+        struct encodec_context *ectx);
+
+    /**
+     * Gets the size of the code data from the given encodec context.
+     *
+     * @param ectx The encodec context to get the code size from.
+     * @return The size of the code data.
+    */
+    int encodec_get_codes_size(
+        struct encodec_context *ectx);
+
+    /**
+     * Gets the statistics for the given encodec context.
+     *
+     * @param ectx The encodec context to get the statistics for.
+     * @return A pointer to the statistics struct.
+    */
+    const struct encodec_statistics* encodec_get_statistics(
+        struct encodec_context *ectx);
+
+    /**
+     * Reset the statistics for the given encodec context.
+     *
+     * @param ectx The encodec context to reset the statistics for.
+    */
+   void encodec_reset_statistics(
+        struct encodec_context *ectx);
+
+    /**
+     * @brief Frees the memory allocated for an encodec context.
+     *
+     * @param ectx The encodec context to free.
+     */
+    void encodec_free(
+        struct encodec_context *ectx);
+
+#ifdef __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/encoder.h
+++ b/dep/include/vall_e.cpp/encoder.h
@ -0,0 +1,109 @@
+#pragma once
+
+#include <vector>
+
+#include "ggml.h"
+#include "lstm.h"
+
+// res + downsample block at some ratio
+struct encodec_encoder_block {
+    // conv1
+    struct ggml_tensor *conv_1_w;
+    struct ggml_tensor *conv_1_b;
+
+    // conv2
+    struct ggml_tensor *conv_2_w;
+    struct ggml_tensor *conv_2_b;
+
+    // shortcut
+    struct ggml_tensor *conv_sc_w;
+    struct ggml_tensor *conv_sc_b;
+
+    // downsampling layers
+    struct ggml_tensor *ds_conv_w;
+    struct ggml_tensor *ds_conv_b;
+};
+
+struct encodec_encoder {
+    struct ggml_tensor *init_conv_w;
+    struct ggml_tensor *init_conv_b;
+
+    encodec_lstm lstm;
+
+    struct ggml_tensor *final_conv_w;
+    struct ggml_tensor *final_conv_b;
+
+    std::vector<encodec_encoder_block> blocks;
+};
+
+struct ggml_tensor *encodec_forward_encoder(
+    const struct encodec_encoder *encoder, struct ggml_context *ctx0,
+    struct ggml_tensor *inp, const int * ratios, const int kernel_size, const int res_kernel_size,
+    const int stride) {
+
+    if (!inp) {
+        fprintf(stderr, "%s: null input tensor\n", __func__);
+        return NULL;
+    }
+
+    struct ggml_tensor *inpL = strided_conv_1d(
+        ctx0, inp, encoder->init_conv_w, encoder->init_conv_b, stride);
+
+    for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
+        encodec_encoder_block block = encoder->blocks[layer_ix];
+
+        struct ggml_tensor *current = inpL;
+
+        // shortcut
+        struct ggml_tensor *shortcut = strided_conv_1d(
+            ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);
+
+        // conv1
+        current = ggml_elu(ctx0, current);
+
+        current = strided_conv_1d(
+            ctx0, current, block.conv_1_w, block.conv_1_b, stride);
+
+        // conv2
+        current = ggml_elu(ctx0, current);
+
+        current = strided_conv_1d(
+            ctx0, current, block.conv_2_w, block.conv_2_b, stride);
+
+        // residual connection
+        inpL = ggml_add(ctx0, current, shortcut);
+
+        // downsampling layers
+        inpL = ggml_elu(ctx0, inpL);
+
+        inpL = strided_conv_1d(
+            ctx0, inpL, block.ds_conv_w, block.ds_conv_b, ratios[3 - layer_ix]);
+    }
+
+    // lstm
+    {
+        struct ggml_tensor *cur = inpL;
+
+        const encodec_lstm lstm = encoder->lstm;
+
+        // first lstm layer
+        char l0_prefix[7] = "enc_l0";
+        struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
+            ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix);
+
+        // second lstm layer
+        char l1_prefix[7] = "enc_l1";
+        struct ggml_tensor *out = forward_pass_lstm_unilayer(
+            ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix);
+
+        inpL = ggml_add(ctx0, inpL, out);
+    }
+
+    // final conv
+    inpL = ggml_elu(ctx0, inpL);
+
+    struct ggml_tensor *encoded_inp = strided_conv_1d(
+        ctx0, inpL, encoder->final_conv_w, encoder->final_conv_b, stride);
+
+    return encoded_inp;
+}
--- a/dep/include/vall_e.cpp/espeak-ng/encoding.h
+++ b/dep/include/vall_e.cpp/espeak-ng/encoding.h
@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2017 Reece H. Dunn
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see: <http://www.gnu.org/licenses/>.
+ */
+#ifndef ESPEAK_NG_ENCODING_H
+#define ESPEAK_NG_ENCODING_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+typedef enum
+{
+	ESPEAKNG_ENCODING_UNKNOWN,
+	ESPEAKNG_ENCODING_US_ASCII,
+	ESPEAKNG_ENCODING_ISO_8859_1,
+	ESPEAKNG_ENCODING_ISO_8859_2,
+	ESPEAKNG_ENCODING_ISO_8859_3,
+	ESPEAKNG_ENCODING_ISO_8859_4,
+	ESPEAKNG_ENCODING_ISO_8859_5,
+	ESPEAKNG_ENCODING_ISO_8859_6,
+	ESPEAKNG_ENCODING_ISO_8859_7,
+	ESPEAKNG_ENCODING_ISO_8859_8,
+	ESPEAKNG_ENCODING_ISO_8859_9,
+	ESPEAKNG_ENCODING_ISO_8859_10,
+	ESPEAKNG_ENCODING_ISO_8859_11,
+	// ISO-8859-12 is not a valid encoding.
+	ESPEAKNG_ENCODING_ISO_8859_13,
+	ESPEAKNG_ENCODING_ISO_8859_14,
+	ESPEAKNG_ENCODING_ISO_8859_15,
+	ESPEAKNG_ENCODING_ISO_8859_16,
+	ESPEAKNG_ENCODING_KOI8_R,
+	ESPEAKNG_ENCODING_ISCII,
+	ESPEAKNG_ENCODING_UTF_8,
+	ESPEAKNG_ENCODING_ISO_10646_UCS_2,
+} espeak_ng_ENCODING;
+
+ESPEAK_NG_API espeak_ng_ENCODING
+espeak_ng_EncodingFromName(const char *encoding);
+
+typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER;
+
+ESPEAK_NG_API espeak_ng_TEXT_DECODER *
+create_text_decoder(void);
+
+ESPEAK_NG_API void
+destroy_text_decoder(espeak_ng_TEXT_DECODER *decoder);
+
+ESPEAK_NG_API espeak_ng_STATUS
+text_decoder_decode_string(espeak_ng_TEXT_DECODER *decoder,
+                           const char *string,
+                           int length,
+                           espeak_ng_ENCODING encoding);
+
+ESPEAK_NG_API espeak_ng_STATUS
+text_decoder_decode_string_auto(espeak_ng_TEXT_DECODER *decoder,
+                                const char *string,
+                                int length,
+                                espeak_ng_ENCODING encoding);
+
+ESPEAK_NG_API espeak_ng_STATUS
+text_decoder_decode_wstring(espeak_ng_TEXT_DECODER *decoder,
+                            const wchar_t *string,
+                            int length);
+
+ESPEAK_NG_API espeak_ng_STATUS
+text_decoder_decode_string_multibyte(espeak_ng_TEXT_DECODER *decoder,
+                                     const void *input,
+                                     espeak_ng_ENCODING encoding,
+                                     int flags);
+
+ESPEAK_NG_API int
+text_decoder_eof(espeak_ng_TEXT_DECODER *decoder);
+
+ESPEAK_NG_API uint32_t
+text_decoder_getc(espeak_ng_TEXT_DECODER *decoder);
+
+ESPEAK_NG_API uint32_t
+text_decoder_peekc(espeak_ng_TEXT_DECODER *decoder);
+
+ESPEAK_NG_API const void *
+text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/dep/include/vall_e.cpp/espeak-ng/espeak_ng.h
+++ b/dep/include/vall_e.cpp/espeak-ng/espeak_ng.h
@ -0,0 +1,223 @@
+/* eSpeak NG API.
+ *
+ * Copyright (C) 2015-2017 Reece H. Dunn
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef ESPEAK_NG_H
+#define ESPEAK_NG_H
+
+#include <espeak-ng/speak_lib.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(_WIN32) || defined(_WIN64)
+#ifdef LIBESPEAK_NG_EXPORT
+#define ESPEAK_NG_API __declspec(dllexport)
+#else
+#define ESPEAK_NG_API __declspec(dllimport)
+#endif
+#else
+#define ESPEAK_NG_API
+#endif
+
+#define ESPEAKNG_DEFAULT_VOICE "en"
+
+typedef enum {
+	ENS_GROUP_MASK               = 0x70000000,
+	ENS_GROUP_ERRNO              = 0x00000000, /* Values 0-255 map to errno error codes. */
+	ENS_GROUP_ESPEAK_NG          = 0x10000000, /* eSpeak NG error codes. */
+
+	/* eSpeak NG 1.49.0 */
+	ENS_OK                       = 0,
+	ENS_COMPILE_ERROR            = 0x100001FF,
+	ENS_VERSION_MISMATCH         = 0x100002FF,
+	ENS_FIFO_BUFFER_FULL         = 0x100003FF,
+	ENS_NOT_INITIALIZED          = 0x100004FF,
+	ENS_AUDIO_ERROR              = 0x100005FF,
+	ENS_VOICE_NOT_FOUND          = 0x100006FF,
+	ENS_MBROLA_NOT_FOUND         = 0x100007FF,
+	ENS_MBROLA_VOICE_NOT_FOUND   = 0x100008FF,
+	ENS_EVENT_BUFFER_FULL        = 0x100009FF,
+	ENS_NOT_SUPPORTED            = 0x10000AFF,
+	ENS_UNSUPPORTED_PHON_FORMAT  = 0x10000BFF,
+	ENS_NO_SPECT_FRAMES          = 0x10000CFF,
+	ENS_EMPTY_PHONEME_MANIFEST   = 0x10000DFF,
+	ENS_SPEECH_STOPPED           = 0x10000EFF,
+
+	/* eSpeak NG 1.49.2 */
+	ENS_UNKNOWN_PHONEME_FEATURE  = 0x10000FFF,
+	ENS_UNKNOWN_TEXT_ENCODING    = 0x100010FF,
+} espeak_ng_STATUS;
+
+typedef enum {
+	ENOUTPUT_MODE_SYNCHRONOUS = 0x0001,
+	ENOUTPUT_MODE_SPEAK_AUDIO = 0x0002,
+} espeak_ng_OUTPUT_MODE;
+
+typedef enum {
+	ENGENDER_UNKNOWN = 0,
+	ENGENDER_MALE = 1,
+	ENGENDER_FEMALE = 2,
+	ENGENDER_NEUTRAL = 3,
+} espeak_ng_VOICE_GENDER;
+
+typedef struct
+{
+  void (*outputPhoSymbol)(char* pho_code,int pho_type);
+  void (*outputSilence)(short echo_tail);
+  void (*outputVoiced)(short sample);
+  void (*outputUnvoiced)(short sample);
+} espeak_ng_OUTPUT_HOOKS;
+
+/* eSpeak NG 1.49.0 */
+
+typedef struct espeak_ng_ERROR_CONTEXT_ *espeak_ng_ERROR_CONTEXT;
+
+ESPEAK_NG_API void
+espeak_ng_ClearErrorContext(espeak_ng_ERROR_CONTEXT *context);
+
+ESPEAK_NG_API void
+espeak_ng_GetStatusCodeMessage(espeak_ng_STATUS status,
+                               char *buffer,
+                               size_t length);
+
+ESPEAK_NG_API void
+espeak_ng_PrintStatusCodeMessage(espeak_ng_STATUS status,
+                                 FILE *out,
+                                 espeak_ng_ERROR_CONTEXT context);
+
+ESPEAK_NG_API void
+espeak_ng_InitializePath(const char *path);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_Initialize(espeak_ng_ERROR_CONTEXT *context);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_InitializeOutput(espeak_ng_OUTPUT_MODE output_mode,
+                           int buffer_length,
+                           const char *device);
+
+ESPEAK_NG_API int
+espeak_ng_GetSampleRate(void);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_SetParameter(espeak_PARAMETER parameter,
+                       int value,
+                       int relative);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_SetPhonemeEvents(int enable, int ipa);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_SetPunctuationList(const wchar_t *punctlist);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_SetVoiceByName(const char *name);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_SetVoiceByFile(const char *filename);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_SetVoiceByProperties(espeak_VOICE *voice_selector);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_Synthesize(const void *text,
+                     size_t size,
+                     unsigned int position,
+                     espeak_POSITION_TYPE position_type,
+                     unsigned int end_position,
+                     unsigned int flags,
+                     unsigned int *unique_identifier,
+                     void *user_data);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_SynthesizeMark(const void *text,
+                         size_t size,
+                         const char *index_mark,
+                         unsigned int end_position,
+                         unsigned int flags,
+                         unsigned int *unique_identifier,
+                         void *user_data);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_SpeakKeyName(const char *key_name);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_SpeakCharacter(wchar_t character);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_Cancel(void);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_Synchronize(void);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_Terminate(void);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_CompileDictionary(const char *dsource,
+                            const char *dict_name,
+                            FILE *log,
+                            int flags,
+                            espeak_ng_ERROR_CONTEXT *context);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_CompileMbrolaVoice(const char *path,
+                             FILE *log,
+                             espeak_ng_ERROR_CONTEXT *context);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_CompilePhonemeData(long rate,
+                             FILE *log,
+                             espeak_ng_ERROR_CONTEXT *context);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_CompileIntonation(FILE *log,
+                            espeak_ng_ERROR_CONTEXT *context);
+
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_CompileIntonationPath(const char *source_path,
+                                const char *destination_path,
+                                FILE *log,
+                                espeak_ng_ERROR_CONTEXT *context);
+
+/* eSpeak NG 1.49.1 */
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_CompilePhonemeDataPath(long rate,
+                                 const char *source_path,
+                                 const char *destination_path,
+                                 FILE *log,
+                                 espeak_ng_ERROR_CONTEXT *context);
+                                 
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_SetOutputHooks(espeak_ng_OUTPUT_HOOKS* hooks);
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_SetConstF0(int f0);
+
+ESPEAK_NG_API espeak_ng_STATUS
+espeak_ng_SetRandSeed(long seed);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/dep/include/vall_e.cpp/espeak-ng/speak_lib.h
+++ b/dep/include/vall_e.cpp/espeak-ng/speak_lib.h
@ -0,0 +1,709 @@
+#ifndef SPEAK_LIB_H
+#define SPEAK_LIB_H
+/***************************************************************************
+ *   Copyright (C) 2005 to 2012 by Jonathan Duddington                     *
+ *   email: jonsd@users.sourceforge.net                                    *
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 3 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program; if not, see:                                 *
+ *               <http://www.gnu.org/licenses/>.                           *
+ ***************************************************************************/
+
+
+/*************************************************************/
+/* This is the header file for the library version of espeak */
+/*                                                           */
+/*************************************************************/
+
+#include <stdio.h>
+#include <stddef.h>
+
+#if defined(_WIN32) || defined(_WIN64)
+#ifdef LIBESPEAK_NG_EXPORT
+#define ESPEAK_API __declspec(dllexport)
+#else
+#define ESPEAK_API __declspec(dllimport)
+#endif
+#else
+#define ESPEAK_API
+#endif
+
+#define ESPEAK_API_REVISION  12
+/*
+Revision 2
+   Added parameter "options" to eSpeakInitialize()
+
+Revision 3
+   Added espeakWORDGAP to  espeak_PARAMETER
+
+Revision 4
+   Added flags parameter to espeak_CompileDictionary()
+
+Revision 5
+   Added espeakCHARS_16BIT
+
+Revision 6
+  Added macros: espeakRATE_MINIMUM, espeakRATE_MAXIMUM, espeakRATE_NORMAL
+
+Revision 7  24.Dec.2011
+  Changed espeak_EVENT structure to add id.string[] for phoneme mnemonics.
+  Added espeakINITIALIZE_PHONEME_IPA option for espeak_Initialize() to report phonemes as IPA names.
+
+Revision 8  26.Apr.2013
+  Added function espeak_TextToPhonemes().
+
+Revision 9  30.May.2013
+  Changed function espeak_TextToPhonemes().
+
+Revision 10 29.Aug.2014
+  Changed phonememode parameter to espeak_TextToPhonemes() and espeak_SetPhonemeTrace
+
+Revision 11 (espeak-ng)
+  Made ESPEAK_API import/export symbols correctly on Windows.
+
+Revision 12 (espeak-ng)
+  Exposed espeak_SetPhonemeCallback. This is available in eSpeak, but was not exposed in this header.
+
+*/
+         /********************/
+         /*  Initialization  */
+         /********************/
+
+// values for 'value' in espeak_SetParameter(espeakRATE, value, 0), nominally in words-per-minute
+#define espeakRATE_MINIMUM  80
+#define espeakRATE_MAXIMUM  450
+#define espeakRATE_NORMAL   175
+
+
+typedef enum {
+  espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list.
+  espeakEVENT_WORD = 1,            // Start of word
+  espeakEVENT_SENTENCE = 2,        // Start of sentence
+  espeakEVENT_MARK = 3,            // Mark
+  espeakEVENT_PLAY = 4,            // Audio element
+  espeakEVENT_END = 5,             // End of sentence or clause
+  espeakEVENT_MSG_TERMINATED = 6,  // End of message
+  espeakEVENT_PHONEME = 7,         // Phoneme, if enabled in espeak_Initialize()
+  espeakEVENT_SAMPLERATE = 8       // Set sample rate
+} espeak_EVENT_TYPE;
+
+
+
+typedef struct {
+	espeak_EVENT_TYPE type;
+	unsigned int unique_identifier; // message identifier (or 0 for key or character)
+	int text_position;    // the number of characters from the start of the text
+	int length;           // word length, in characters (for espeakEVENT_WORD)
+	int audio_position;   // the time in mS within the generated speech output data
+	int sample;           // sample id (internal use)
+	void* user_data;      // pointer supplied by the calling program
+	union {
+		int number;        // used for WORD and SENTENCE events.
+		const char *name;  // used for MARK and PLAY events.  UTF8 string
+		char string[8];    // used for phoneme names (UTF8). Terminated by a zero byte unless the name needs the full 8 bytes.
+	} id;
+} espeak_EVENT;
+/*
+   When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called.
+
+
+   In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED).
+
+   In PLAYBACK mode, the callback function is called as soon as an event happens.
+
+   For example suppose that the following message is supplied to espeak_Synth:
+   "hello, hello."
+
+
+   * Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function :
+
+   ** Block 1:
+   <audio data> +
+   List of events: SENTENCE + WORD + LIST_TERMINATED
+
+   ** Block 2:
+   <audio data> +
+   List of events: WORD + END + LIST_TERMINATED
+
+   ** Block 3:
+   no audio data
+   List of events: MSG_TERMINATED + LIST_TERMINATED
+
+
+   * Once processed in PLAYBACK mode, it could lead to 5 calls of the callback function:
+
+   ** SENTENCE
+   ** WORD (call when the sounds are actually played)
+   ** WORD
+   ** END (call when the end of sentence is actually played.)
+   ** MSG_TERMINATED
+
+
+   The MSG_TERMINATED event is the last event. It can inform the calling program to clear the user data related to the message.
+   So if the synthesis must be stopped, the callback function is called for each pending message with the MSG_TERMINATED event.
+
+   A MARK event indicates a <mark> element in the text.
+   A PLAY event indicates an <audio> element in the text, for which the calling program should play the named sound file.
+*/
+
+
+
+typedef enum {
+	POS_CHARACTER = 1,
+	POS_WORD,
+	POS_SENTENCE
+} espeak_POSITION_TYPE;
+
+
+typedef enum {
+	/* PLAYBACK mode: plays the audio data, supplies events to the calling program*/
+	AUDIO_OUTPUT_PLAYBACK,
+
+	/* RETRIEVAL mode: supplies audio data and events to the calling program */
+	AUDIO_OUTPUT_RETRIEVAL,
+
+	/* SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed */
+	AUDIO_OUTPUT_SYNCHRONOUS,
+
+	/* Synchronous playback */
+	AUDIO_OUTPUT_SYNCH_PLAYBACK
+
+} espeak_AUDIO_OUTPUT;
+
+
+typedef enum {
+	EE_OK=0,
+	EE_INTERNAL_ERROR=-1,
+	EE_BUFFER_FULL=1,
+	EE_NOT_FOUND=2
+} espeak_ERROR;
+
+#define espeakINITIALIZE_PHONEME_EVENTS 0x0001
+#define espeakINITIALIZE_PHONEME_IPA   0x0002
+#define espeakINITIALIZE_DONT_EXIT     0x8000
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path, int options);
+/* Must be called before any synthesis functions are called.
+   output: the audio data can either be played by eSpeak or passed back by the SynthCallback function.
+
+   buflength:  The length in mS of sound buffers passed to the SynthCallback function.
+            Value=0 gives a default of 60mS.
+            This parameter is only used for AUDIO_OUTPUT_RETRIEVAL and AUDIO_OUTPUT_SYNCHRONOUS modes.
+
+   path: The directory which contains the espeak-ng-data directory, or NULL for the default location.
+
+   options: bit 0:  1=allow espeakEVENT_PHONEME events.
+            bit 1:  1= espeakEVENT_PHONEME events give IPA phoneme names, not eSpeak phoneme names
+            bit 15: 1=don't exit if espeak_data is not found (used for --help)
+
+   Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR).
+*/
+
+typedef int (t_espeak_callback)(short*, int, espeak_EVENT*);
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API void espeak_SetSynthCallback(t_espeak_callback* SynthCallback);
+/* Must be called before any synthesis functions are called.
+   This specifies a function in the calling program which is called when a buffer of
+   speech sound data has been produced.
+
+
+   The callback function is of the form:
+
+int SynthCallback(short *wav, int numsamples, espeak_EVENT *events);
+
+   wav:  is the speech sound data which has been produced.
+      NULL indicates that the synthesis has been completed.
+
+   numsamples: is the number of entries in wav.  This number may vary, may be less than
+      the value implied by the buflength parameter given in espeak_Initialize, and may
+      sometimes be zero (which does NOT indicate end of synthesis).
+
+   events: an array of espeak_EVENT items which indicate word and sentence events, and
+      also the occurrence if <mark> and <audio> elements within the text.  The list of
+      events is terminated by an event of type = 0.
+
+
+   Callback returns: 0=continue synthesis,  1=abort synthesis.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API void espeak_SetUriCallback(int (*UriCallback)(int, const char*, const char*));
+/* This function may be called before synthesis functions are used, in order to deal with
+   <audio> tags.  It specifies a callback function which is called when an <audio> element is
+   encountered and allows the calling program to indicate whether the sound file which
+   is specified in the <audio> element is available and is to be played.
+
+   The callback function is of the form:
+
+int UriCallback(int type, const char *uri, const char *base);
+
+   type:  type of callback event.  Currently only 1= <audio> element
+
+   uri:   the "src" attribute from the <audio> element
+
+   base:  the "xml:base" attribute (if any) from the <speak> element
+
+   Return: 1=don't play the sound, but speak the text alternative.
+           0=place a PLAY event in the event list at the point where the <audio> element
+             occurs.  The calling program can then play the sound at that point.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API void espeak_SetPhonemeCallback(int (*PhonemeCallback)(const char *));
+
+
+         /********************/
+         /*    Synthesis     */
+         /********************/
+
+
+#define espeakCHARS_AUTO   0
+#define espeakCHARS_UTF8   1
+#define espeakCHARS_8BIT   2
+#define espeakCHARS_WCHAR  3
+#define espeakCHARS_16BIT  4
+
+#define espeakSSML        0x10
+#define espeakPHONEMES    0x100
+#define espeakENDPAUSE    0x1000
+#define espeakKEEP_NAMEDATA 0x2000
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_ERROR espeak_Synth(const void *text,
+	size_t size,
+	unsigned int position,
+	espeak_POSITION_TYPE position_type,
+	unsigned int end_position,
+	unsigned int flags,
+	unsigned int* unique_identifier,
+	void* user_data);
+/* Synthesize speech for the specified text.  The speech sound data is passed to the calling
+   program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak.
+
+   text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters,
+      wide characters (wchar_t), or UTF8 encoding.  Which of these is determined by the "flags"
+      parameter.
+
+   size: Equal to (or greatrer than) the size of the text data, in bytes.  This is used in order
+      to allocate internal storage space for the text.  This value is not used for
+      AUDIO_OUTPUT_SYNCHRONOUS mode.
+
+   position:  The position in the text where speaking starts. Zero indicates speak from the
+      start of the text.
+
+   position_type:  Determines whether "position" is a number of characters, words, or sentences.
+      Values:
+
+   end_position:  If set, this gives a character position at which speaking will stop.  A value
+      of zero indicates no end position.
+
+   flags:  These may be OR'd together:
+      Type of character codes, one of:
+         espeakCHARS_UTF8     UTF8 encoding
+         espeakCHARS_8BIT     The 8 bit ISO-8859 character set for the particular language.
+         espeakCHARS_AUTO     8 bit or UTF8  (this is the default)
+         espeakCHARS_WCHAR    Wide characters (wchar_t)
+         espeakCHARS_16BIT    16 bit characters.
+
+      espeakSSML   Elements within < > are treated as SSML elements, or if not recognised are ignored.
+
+      espeakPHONEMES  Text within [[ ]] is treated as phonemes codes (in espeak's Kirshenbaum encoding).
+
+      espeakENDPAUSE  If set then a sentence pause is added at the end of the text.  If not set then
+         this pause is suppressed.
+
+   unique_identifier: This must be either NULL, or point to an integer variable to
+       which eSpeak writes a message identifier number.
+       eSpeak includes this number in espeak_EVENT messages which are the result of
+       this call of espeak_Synth().
+
+   user_data: a pointer (or NULL) which will be passed to the callback function in
+       espeak_EVENT messages.
+
+   Return: EE_OK: operation achieved
+           EE_BUFFER_FULL: the command can not be buffered;
+             you may try after a while to call the function again.
+	   EE_INTERNAL_ERROR.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_ERROR espeak_Synth_Mark(const void *text,
+	size_t size,
+	const char *index_mark,
+	unsigned int end_position,
+	unsigned int flags,
+	unsigned int* unique_identifier,
+	void* user_data);
+/* Synthesize speech for the specified text.  Similar to espeak_Synth() but the start position is
+   specified by the name of a <mark> element in the text.
+
+   index_mark:  The "name" attribute of a <mark> element within the text which specified the
+      point at which synthesis starts.  UTF8 string.
+
+   For the other parameters, see espeak_Synth()
+
+   Return: EE_OK: operation achieved
+           EE_BUFFER_FULL: the command can not be buffered;
+             you may try after a while to call the function again.
+	   EE_INTERNAL_ERROR.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_ERROR espeak_Key(const char *key_name);
+/* Speak the name of a keyboard key.
+   If key_name is a single character, it speaks the name of the character.
+   Otherwise, it speaks key_name as a text string.
+
+   Return: EE_OK: operation achieved
+           EE_BUFFER_FULL: the command can not be buffered;
+             you may try after a while to call the function again.
+	   EE_INTERNAL_ERROR.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_ERROR espeak_Char(wchar_t character);
+/* Speak the name of the given character
+
+   Return: EE_OK: operation achieved
+           EE_BUFFER_FULL: the command can not be buffered;
+             you may try after a while to call the function again.
+	   EE_INTERNAL_ERROR.
+*/
+
+
+
+
+         /***********************/
+         /*  Speech Parameters  */
+         /***********************/
+
+typedef enum {
+  espeakSILENCE=0, /* internal use */
+  espeakRATE=1,
+  espeakVOLUME=2,
+  espeakPITCH=3,
+  espeakRANGE=4,
+  espeakPUNCTUATION=5,
+  espeakCAPITALS=6,
+  espeakWORDGAP=7,
+  espeakOPTIONS=8,   // reserved for misc. options.  not yet used
+  espeakINTONATION=9,
+  espeakSSML_BREAK_MUL=10,
+
+  espeakRESERVED2=11,
+  espeakEMPHASIS,   /* internal use */
+  espeakLINELENGTH, /* internal use */
+  espeakVOICETYPE,  // internal, 1=mbrola
+  N_SPEECH_PARAM    /* last enum */
+} espeak_PARAMETER;
+
+typedef enum {
+  espeakPUNCT_NONE=0,
+  espeakPUNCT_ALL=1,
+  espeakPUNCT_SOME=2
+} espeak_PUNCT_TYPE;
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative);
+/* Sets the value of the specified parameter.
+   relative=0   Sets the absolute value of the parameter.
+   relative=1   Sets a relative value of the parameter.
+
+   parameter:
+      espeakRATE:    speaking speed in word per minute.  Values 80 to 450.
+
+      espeakVOLUME:  volume in range 0-200 or more.
+                     0=silence, 100=normal full volume, greater values may produce amplitude compression or distortion
+
+      espeakPITCH:   base pitch, range 0-100.  50=normal
+
+      espeakRANGE:   pitch range, range 0-100. 0-monotone, 50=normal
+
+      espeakPUNCTUATION:  which punctuation characters to announce:
+         value in espeak_PUNCT_TYPE (none, all, some),
+         see espeak_GetParameter() to specify which characters are announced.
+
+      espeakCAPITALS: announce capital letters by:
+         0=none,
+         1=sound icon,
+         2=spelling,
+         3 or higher, by raising pitch.  This values gives the amount in Hz by which the pitch
+            of a word raised to indicate it has a capital letter.
+
+      espeakWORDGAP:  pause between words, units of 10mS (at the default speed)
+
+   Return: EE_OK: operation achieved
+           EE_BUFFER_FULL: the command can not be buffered;
+             you may try after a while to call the function again.
+	   EE_INTERNAL_ERROR.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API int espeak_GetParameter(espeak_PARAMETER parameter, int current);
+/* current=0  Returns the default value of the specified parameter.
+   current=1  Returns the current value of the specified parameter, as set by SetParameter()
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_ERROR espeak_SetPunctuationList(const wchar_t *punctlist);
+/* Specified a list of punctuation characters whose names are to be spoken when the
+   value of the Punctuation parameter is set to "some".
+
+   punctlist:  A list of character codes, terminated by a zero character.
+
+   Return: EE_OK: operation achieved
+           EE_BUFFER_FULL: the command can not be buffered;
+             you may try after a while to call the function again.
+	   EE_INTERNAL_ERROR.
+*/
+
+#define espeakPHONEMES_SHOW    0x01
+#define espeakPHONEMES_IPA     0x02
+#define espeakPHONEMES_TRACE   0x08
+#define espeakPHONEMES_MBROLA  0x10
+#define espeakPHONEMES_TIE     0x80
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API void espeak_SetPhonemeTrace(int phonememode, FILE *stream);
+/* phonememode:  Controls the output of phoneme symbols for the text
+      bits 0-2:
+         value=0  No phoneme output (default)
+         value=1  Output the translated phoneme symbols for the text
+         value=2  as (1), but produces IPA phoneme names rather than ascii
+      bit 3:   output a trace of how the translation was done (showing the matching rules and list entries)
+      bit 4:   produce pho data for mbrola
+      bit 7:   use (bits 8-23) as a tie within multi-letter phonemes names
+      bits 8-23:  separator character, between phoneme names
+
+   stream   output stream for the phoneme symbols (and trace).  If stream=NULL then it uses stdout.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API const char *espeak_TextToPhonemes(const void **textptr, int textmode, int phonememode);
+/* Translates text into phonemes.  Call espeak_SetVoiceByName() first, to select a language.
+
+   It returns a pointer to a character string which contains the phonemes for the text up to
+   end of a sentence, or comma, semicolon, colon, or similar punctuation.
+
+   textptr: The address of a pointer to the input text which is terminated by a zero character.
+      On return, the pointer has been advanced past the text which has been translated, or else set
+      to NULL to indicate that the end of the text has been reached.
+
+   textmode: Type of character codes, one of:
+         espeakCHARS_UTF8     UTF8 encoding
+         espeakCHARS_8BIT     The 8 bit ISO-8859 character set for the particular language.
+         espeakCHARS_AUTO     8 bit or UTF8  (this is the default)
+         espeakCHARS_WCHAR    Wide characters (wchar_t)
+         espeakCHARS_16BIT    16 bit characters.
+
+   phoneme_mode
+	    bit 1:   0=eSpeak's ascii phoneme names, 1= International Phonetic Alphabet (as UTF-8 characters).
+        bit 7:   use (bits 8-23) as a tie within multi-letter phonemes names
+        bits 8-23:  separator character, between phoneme names
+
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API void espeak_CompileDictionary(const char *path, FILE *log, int flags);
+/* Compile pronunciation dictionary for a language which corresponds to the currently
+   selected voice.  The required voice should be selected before calling this function.
+
+   path:  The directory which contains the language's '_rules' and '_list' files.
+          'path' should end with a path separator character ('/').
+   log:   Stream for error reports and statistics information. If log=NULL then stderr will be used.
+
+   flags:  Bit 0: include source line information for debug purposes (This is displayed with the
+          -X command line option).
+*/
+         /***********************/
+         /*   Voice Selection   */
+         /***********************/
+
+
+// voice table
+typedef struct {
+	const char *name;      // a given name for this voice. UTF8 string.
+	const char *languages;       // list of pairs of (byte) priority + (string) language (and dialect qualifier)
+	const char *identifier;      // the filename for this voice within espeak-ng-data/voices
+	unsigned char gender;  // 0=none 1=male, 2=female,
+	unsigned char age;     // 0=not specified, or age in years
+	unsigned char variant; // only used when passed as a parameter to espeak_SetVoiceByProperties
+	unsigned char xx1;     // for internal use
+	int score;       // for internal use
+	void *spare;     // for internal use
+} espeak_VOICE;
+
+/* Note: The espeak_VOICE structure is used for two purposes:
+  1.  To return the details of the available voices.
+  2.  As a parameter to  espeak_SetVoiceByProperties() in order to specify selection criteria.
+
+   In (1), the "languages" field consists of a list of (UTF8) language names for which this voice
+   may be used, each language name in the list is terminated by a zero byte and is also preceded by
+   a single byte which gives a "priority" number.  The list of languages is terminated by an
+   additional zero byte.
+
+   A language name consists of a language code, optionally followed by one or more qualifier (dialect)
+   names separated by hyphens (eg. "en-uk").  A voice might, for example, have languages "en-uk" and
+   "en".  Even without "en" listed, voice would still be selected for the "en" language (because
+   "en-uk" is related) but at a lower priority.
+
+   The priority byte indicates how the voice is preferred for the language. A low number indicates a
+   more preferred voice, a higher number indicates a less preferred voice.
+
+   In (2), the "languages" field consists simply of a single (UTF8) language name, with no preceding
+   priority byte.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API const espeak_VOICE **espeak_ListVoices(espeak_VOICE *voice_spec);
+/* Reads the voice files from espeak-ng-data/voices and creates an array of espeak_VOICE pointers.
+   The list is terminated by a NULL pointer
+
+   If voice_spec is NULL then all voices are listed.
+   If voice spec is given, then only the voices which are compatible with the voice_spec
+   are listed, and they are listed in preference order.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_ERROR espeak_SetVoiceByFile(const char *filename);
+/* Loads a voice given the file path.  Language is not considered.
+   "filename" is a UTF8 string.
+
+   Return: EE_OK: operation achieved
+           EE_BUFFER_FULL: the command can not be buffered;
+             you may try after a while to call the function again.
+	   EE_INTERNAL_ERROR.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_ERROR espeak_SetVoiceByName(const char *name);
+/* Searches for a voice with a matching "name" field.  Language is not considered.
+   "name" is a UTF8 string.
+
+   Return: EE_OK: operation achieved
+           EE_BUFFER_FULL: the command can not be buffered;
+             you may try after a while to call the function again.
+	   EE_INTERNAL_ERROR.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_ERROR espeak_SetVoiceByProperties(espeak_VOICE *voice_spec);
+/* An espeak_VOICE structure is used to pass criteria to select a voice.  Any of the following
+   fields may be set:
+
+   name     NULL, or a voice name
+
+   languages  NULL, or a single language string (with optional dialect), eg. "en-uk", or "en"
+
+   gender   0=not specified, 1=male, 2=female
+
+   age      0=not specified, or an age in years
+
+   variant  After a list of candidates is produced, scored and sorted, "variant" is used to index
+            that list and choose a voice.
+            variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_VOICE *espeak_GetCurrentVoice(void);
+/* Returns the espeak_VOICE data for the currently selected voice.
+   This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s>
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_ERROR espeak_Cancel(void);
+/* Stop immediately synthesis and audio output of the current text. When this
+   function returns, the audio output is fully stopped and the synthesizer is ready to
+   synthesize a new message.
+
+   Return: EE_OK: operation achieved
+	   EE_INTERNAL_ERROR.
+*/
+
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API int espeak_IsPlaying(void);
+/* Returns 1 if audio is played, 0 otherwise.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_ERROR espeak_Synchronize(void);
+/* This function returns when all data have been spoken.
+   Return: EE_OK: operation achieved
+	   EE_INTERNAL_ERROR.
+*/
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API espeak_ERROR espeak_Terminate(void);
+/* last function to be called.
+   Return: EE_OK: operation achieved
+	   EE_INTERNAL_ERROR.
+*/
+
+
+#ifdef __cplusplus
+extern "C"
+#endif
+ESPEAK_API const char *espeak_Info(const char **path_data);
+/* Returns the version number string.
+   path_data  returns the path to espeak_data
+*/
+#endif
--- a/dep/include/vall_e.cpp/ggml-alloc.h
+++ b/dep/include/vall_e.cpp/ggml-alloc.h
@ -0,0 +1,76 @@
+#pragma once
+
+#include "ggml.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
+typedef struct      ggml_backend_buffer * ggml_backend_buffer_t;
+typedef struct             ggml_backend * ggml_backend_t;
+
+// Tensor allocator
+struct ggml_tallocr {
+    ggml_backend_buffer_t buffer;
+    void * base;
+    size_t alignment;
+    size_t offset;
+};
+
+GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
+GGML_API enum ggml_status    ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
+
+// Graph allocator
+/*
+  Example usage:
+    ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
+
+    // optional: create a worst-case graph and reserve the buffers to avoid reallocations
+    ggml_gallocr_reserve(galloc, build_graph(max_batch));
+
+    // allocate the graph
+    struct ggml_cgraph * graph = build_graph(batch);
+    ggml_gallocr_alloc_graph(galloc, graph);
+
+    printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0));
+
+    // evaluate the graph
+    ggml_backend_graph_compute(backend, graph);
+*/
+
+// special tensor flags for use with the graph allocator:
+//   ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
+//   ggml_set_output(): output tensors are never freed and never overwritten
+
+typedef struct ggml_gallocr * ggml_gallocr_t;
+
+GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft);
+GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs);
+GGML_API void           ggml_gallocr_free(ggml_gallocr_t galloc);
+
+// pre-allocate buffers from a measure graph - does not allocate or modify the graph
+// call with a worst-case graph to avoid buffer reallocations
+// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
+// returns false if the buffer allocation failed
+GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
+GGML_API bool ggml_gallocr_reserve_n(
+    ggml_gallocr_t galloc,
+    struct ggml_cgraph * graph,
+    const int * node_buffer_ids,
+    const int * leaf_buffer_ids);
+
+// automatic reallocation if the topology changes when using a single buffer
+// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
+GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
+
+GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
+
+// Utils
+// Create a buffer and allocate all the tensors in a ggml_context
+GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
+GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/ggml-backend.h
+++ b/dep/include/vall_e.cpp/ggml-backend.h
@ -0,0 +1,354 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-alloc.h"
+
+#ifdef GGML_BACKEND_SHARED
+#    if defined(_WIN32) && !defined(__MINGW32__)
+#        ifdef GGML_BACKEND_BUILD
+#            define GGML_BACKEND_API __declspec(dllexport) extern
+#        else
+#            define GGML_BACKEND_API __declspec(dllimport) extern
+#        endif
+#    else
+#        define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern
+#    endif
+#else
+#    define GGML_BACKEND_API extern
+#endif
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+    typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
+    typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
+    typedef struct ggml_backend_event * ggml_backend_event_t;
+    typedef struct ggml_backend * ggml_backend_t;
+    typedef void * ggml_backend_graph_plan_t;
+    typedef struct ggml_backend_reg * ggml_backend_reg_t;
+    typedef struct ggml_backend_device * ggml_backend_dev_t;
+
+
+    //
+    // Backend buffer type
+    //
+
+    GGML_API const char *          ggml_backend_buft_name          (ggml_backend_buffer_type_t buft);
+    GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer  (ggml_backend_buffer_type_t buft, size_t size);
+    GGML_API size_t                ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
+    GGML_API size_t                ggml_backend_buft_get_max_size  (ggml_backend_buffer_type_t buft);
+    GGML_API size_t                ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
+    GGML_API bool                  ggml_backend_buft_is_host       (ggml_backend_buffer_type_t buft);
+    GGML_API ggml_backend_dev_t    ggml_backend_buft_get_device    (ggml_backend_buffer_type_t buft);
+
+    //
+    // Backend buffer
+    //
+
+    enum ggml_backend_buffer_usage {
+        GGML_BACKEND_BUFFER_USAGE_ANY = 0,
+        GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
+        GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2,
+    };
+
+    GGML_API const char *                   ggml_backend_buffer_name          (ggml_backend_buffer_t buffer);
+    GGML_API void                           ggml_backend_buffer_free          (ggml_backend_buffer_t buffer);
+    GGML_API void *                         ggml_backend_buffer_get_base      (ggml_backend_buffer_t buffer);
+    GGML_API size_t                         ggml_backend_buffer_get_size      (ggml_backend_buffer_t buffer);
+    GGML_API enum ggml_status               ggml_backend_buffer_init_tensor   (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
+    GGML_API size_t                         ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
+    GGML_API size_t                         ggml_backend_buffer_get_max_size  (ggml_backend_buffer_t buffer);
+    GGML_API size_t                         ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
+    GGML_API void                           ggml_backend_buffer_clear         (ggml_backend_buffer_t buffer, uint8_t value);
+    GGML_API bool                           ggml_backend_buffer_is_host       (ggml_backend_buffer_t buffer);
+    GGML_API void                           ggml_backend_buffer_set_usage     (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
+    GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage     (ggml_backend_buffer_t buffer);
+    GGML_API ggml_backend_buffer_type_t     ggml_backend_buffer_get_type      (ggml_backend_buffer_t buffer);
+    GGML_API void                           ggml_backend_buffer_reset         (ggml_backend_buffer_t buffer);
+
+    // tensor copy between different backends
+    GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
+
+    //
+    // Backend (stream)
+    //
+
+    GGML_API ggml_guid_t  ggml_backend_guid(ggml_backend_t backend);
+    GGML_API const char * ggml_backend_name(ggml_backend_t backend);
+    GGML_API void         ggml_backend_free(ggml_backend_t backend);
+
+    GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
+    GGML_API ggml_backend_buffer_t      ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
+    GGML_API size_t                     ggml_backend_get_alignment(ggml_backend_t backend);
+    GGML_API size_t                     ggml_backend_get_max_size(ggml_backend_t backend);
+
+    GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend,       struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
+    GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
+
+    // "offset" refers to the offset in tensor->data for setting/getting data
+    GGML_API void ggml_backend_tensor_set(      struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
+    GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
+    GGML_API void ggml_backend_tensor_memset(   struct ggml_tensor * tensor,     uint8_t value, size_t offset, size_t size);
+
+    GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
+
+    GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
+    GGML_API void                      ggml_backend_graph_plan_free  (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
+
+    GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
+    GGML_API enum ggml_status ggml_backend_graph_compute      (ggml_backend_t backend, struct ggml_cgraph * cgraph);
+    GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
+
+    // NOTE: will be removed, use device version instead
+    GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
+    GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
+    GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
+
+    // asynchronous copy
+    // the copy is performed after all the currently queued operations in backend_src
+    // backend_dst will wait for the copy to complete before performing other operations
+    // automatic fallback to sync copy if async is not supported
+    GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
+
+    GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend);
+
+    //
+    // Events
+    //
+
+    GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device);
+    GGML_API void                 ggml_backend_event_free(ggml_backend_event_t event);
+    GGML_API void                 ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend);
+    GGML_API void                 ggml_backend_event_synchronize(ggml_backend_event_t event);
+    GGML_API void                 ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event);
+
+    //
+    // Backend device
+    //
+
+    enum ggml_backend_dev_type {
+        // CPU device using system memory
+        GGML_BACKEND_DEVICE_TYPE_CPU,
+        // GPU device using dedicated memory
+        GGML_BACKEND_DEVICE_TYPE_GPU,
+        // accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
+        GGML_BACKEND_DEVICE_TYPE_ACCEL
+    };
+
+    // functionality supported by the device
+    struct ggml_backend_dev_caps {
+        // asynchronous operations
+        bool async;
+        // pinned host buffer
+        bool host_buffer;
+        // creating buffers from host ptr
+        bool buffer_from_host_ptr;
+        // event synchronization
+        bool events;
+    };
+
+    // all the device properties
+    struct ggml_backend_dev_props {
+        const char * name;
+        const char * description;
+        size_t memory_free;
+        size_t memory_total;
+        enum ggml_backend_dev_type type;
+        struct ggml_backend_dev_caps caps;
+    };
+
+    GGML_API const char *                  ggml_backend_dev_name(ggml_backend_dev_t device);
+    GGML_API const char *                  ggml_backend_dev_description(ggml_backend_dev_t device);
+    GGML_API void                          ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
+    GGML_API enum ggml_backend_dev_type    ggml_backend_dev_type(ggml_backend_dev_t device);
+    GGML_API void                          ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props);
+    GGML_API ggml_backend_reg_t            ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
+    GGML_API ggml_backend_t                ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
+    GGML_API ggml_backend_buffer_type_t    ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
+    GGML_API ggml_backend_buffer_type_t    ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
+    GGML_API ggml_backend_buffer_t         ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
+
+    GGML_API bool                          ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
+    GGML_API bool                          ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
+    GGML_API bool                          ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
+
+    //
+    // Backend (reg)
+    //
+
+    GGML_API const char *       ggml_backend_reg_name(ggml_backend_reg_t reg);
+    GGML_API size_t             ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
+    GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
+    GGML_API void *             ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
+
+    // Common functions that may be obtained using ggml_backend_reg_get_proc_address
+
+    // Split buffer type for tensor parallelism
+    typedef ggml_backend_buffer_type_t   (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split);
+    // Set the number of threads for the backend
+    typedef void                         (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
+    // Get additional buffer types provided by the device (returns a NULL-terminated array)
+    typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
+    // Set the abort callback for the backend
+    typedef void                         (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data);
+    // Get a list of feature flags supported by the backend (returns a NULL-terminated array)
+    struct ggml_backend_feature {
+        const char * name;
+        const char * value;
+    };
+    typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg);
+
+    //
+    // Backend registry
+    //
+
+    GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
+
+    // Backend (reg) enumeration
+    GGML_API size_t             ggml_backend_reg_count(void);
+    GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
+    GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name);
+
+    // Device enumeration
+    GGML_API size_t             ggml_backend_dev_count(void);
+    GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
+    GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name);
+    GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type);
+
+    // Direct backend (stream) initialization
+    // = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
+    GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
+    // = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
+    GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params);
+    // = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
+    GGML_API ggml_backend_t ggml_backend_init_best(void);
+
+    // Load a backend from a dynamic library and register it
+    GGML_API ggml_backend_reg_t ggml_backend_load(const char * path);
+    // Unload a backend if loaded dynamically and unregister it
+    GGML_API void               ggml_backend_unload(ggml_backend_reg_t reg);
+    // Load all known backends from dynamic libraries
+    GGML_API void               ggml_backend_load_all(void);
+    GGML_API void               ggml_backend_load_all_from_path(const char * dir_path);
+
+    //
+    // Backend scheduler
+    //
+
+    // The backend scheduler allows for multiple backend devices to be used together
+    // Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
+    // The backends are selected based on:
+    // - the backend that supports the operation
+    // - the location of the pre-allocated tensors (e.g. the weights)
+    /*
+      Example usage:
+
+        // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
+        // preferrably to run on the same backend as the buffer
+        ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
+
+        sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
+
+        // initialize buffers from a max size graph (optional)
+        reserve_graph = build_graph(sched, max_batch_size);
+
+        // manually assign nodes to a backend (optional, should not be needed in most cases)
+        struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
+        ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
+
+        ggml_backend_sched_reserve(sched, reserve_graph);
+
+        // compute
+        graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation
+        for (int i = 0; i < 10; ++i) {
+            ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically
+        }
+
+        // if there are graph inputs:
+        graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called)
+        ggml_backend_sched_reset(sched); // clear the allocation of the previous graph
+        ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it
+        ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors
+        ggml_backend_sched_graph_compute(sched, graph); // execute the graph
+
+        // as an alternative to the above it is also possible to assign the inputs to a dedicated context and
+        // allocate them statically via ggml_backend_alloc_ctx_tensors
+    }
+    */
+
+    typedef struct ggml_backend_sched * ggml_backend_sched_t;
+
+    // Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback)
+    // when ask == true, the scheduler wants to know if the user wants to observe this node
+    // this allows the scheduler to batch nodes together in order to evaluate them in a single call
+    //
+    // when ask == false, the scheduler is passing the node tensor to the user for observation
+    // if the user returns false, the scheduler will cancel the graph compute
+    //
+    typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
+
+    // Initialize a backend scheduler, backends with low index are given priority over backends with high index
+    GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
+    GGML_API void                 ggml_backend_sched_free(ggml_backend_sched_t sched);
+
+    // Initialize backend buffers from a measure graph
+    GGML_API bool                 ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success
+
+    GGML_API int                  ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
+    GGML_API ggml_backend_t       ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
+
+    // Get the number of splits of the last graph
+    GGML_API int                  ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
+    GGML_API int                  ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
+
+    GGML_API size_t               ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
+
+    GGML_API void                 ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
+    GGML_API ggml_backend_t       ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
+
+    // Allocate and compute graph on the backend scheduler
+    GGML_API bool                 ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success
+    GGML_API enum ggml_status     ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
+    GGML_API enum ggml_status     ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
+    GGML_API void                 ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
+
+    // Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph.
+    // This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers.
+    // The correct way to use this API is to discard the deallocated tensors and create new ones.
+    GGML_API void                 ggml_backend_sched_reset(ggml_backend_sched_t sched);
+
+    // Set a callback to be called for each resulting node during graph compute
+    GGML_API void                 ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
+
+    //
+    // Utils
+    //
+
+    struct ggml_backend_graph_copy {
+        ggml_backend_buffer_t buffer;
+        struct ggml_context * ctx_allocated;
+        struct ggml_context * ctx_unallocated;
+        struct ggml_cgraph * graph;
+    };
+
+    // Copy a graph to a different backend
+    GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
+    GGML_API void                           ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
+
+    typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
+
+    // Compare the output of two backends
+    GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
+
+    // Tensor initialization
+    GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
+    GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
+
+    // CPU buffer types are always available
+    GGML_API ggml_backend_buffer_t      ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
+    GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/ggml-blas.h
+++ b/dep/include/vall_e.cpp/ggml-blas.h
@ -0,0 +1,25 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+// backend API
+GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
+
+GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
+
+// number of threads used for conversion to float
+// for openblas and blis, this will also set the number of threads used for blas operations
+GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
+
+
+#ifdef  __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/ggml-cann.h
+++ b/dep/include/vall_e.cpp/ggml-cann.h
@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2023-2024 The ggml authors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "ggml-backend.h"
+#include "ggml.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Maximum number of CANN devices supported.
+ */
+#define GGML_CANN_MAX_DEVICES 16
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void);
+
+/**
+ * @brief Initializes the CANN backend for a specified device.
+ *
+ * This function initializes the CANN backend for the given device.
+ * It verifies the device index, allocates a context, and creates a backend
+ * instance.
+ *
+ * @param device The index of the device to initialize.
+ * @return A pointer to the initialized backend instance, or nullptr on failure.
+ */
+GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device);
+
+/**
+ * @brief Checks if a given backend is a CANN backend.
+ *
+ * This function verifies if the provided backend is a CANN backend by comparing
+ * its GUID with the CANN backend's GUID.
+ *
+ * @param backend The backend instance to check.
+ * @return True if the backend is a CANN backend, false otherwise.
+ */
+GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend);
+
+/**
+ * @brief Retrieves the CANN buffer type for a specified device.
+ *
+ * This function initializes and returns the buffer type interface associated
+ * with the given device. It ensures thread-safe access using a mutex.
+ *
+ * @param device The device index for which to retrieve the buffer type.
+ * @return A pointer to the buffer type interface for the specified device, or
+ * nullptr if the device index is out of range.
+ */
+GGML_BACKEND_API ggml_backend_buffer_type_t
+ggml_backend_cann_buffer_type(int32_t device);
+
+/**
+ * @brief Retrieves the number of CANN devices available.
+ *
+ * This function returns the number of CANN devices available based on
+ * information obtained from `ggml_cann_info()`.
+ *
+ * @return The number of CANN devices available.
+ */
+GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void);
+
+/**
+ * @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
+ *
+ * @return A pointer to the host buffer type interface.
+ */
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
+
+/**
+ * @brief Retrieves the description of a specific CANN device.
+ *
+ * This function sets the specified device, retrieves the SoC name,
+ * and writes it into the provided description buffer.
+ *
+ * @param device The device index to retrieve the description for.
+ * @param description Pointer to a buffer where the description will be written.
+ * @param description_size Size of the description buffer.
+ */
+GGML_BACKEND_API void ggml_backend_cann_get_device_description(
+    int32_t device, char* description, size_t description_size);
+
+/**
+ * @brief Retrieves the memory information of a specific CANN device.
+ *
+ * This function sets the specified device, retrieves the free and total
+ * memory information of the specified type (ACL_HBM_MEM), and stores them
+ * in the provided pointers.
+ *
+ * @param device The device index to retrieve memory information for.
+ * @param free Pointer to a variable where the free memory size will be stored.
+ * @param total Pointer to a variable where the total memory size will be
+ * stored.
+ */
+GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device,
+                                                  size_t* free,
+                                                  size_t* total);
+
+#ifdef __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/ggml-cpp.h
+++ b/dep/include/vall_e.cpp/ggml-cpp.h
@ -0,0 +1,39 @@
+#pragma once
+
+#ifndef __cplusplus
+#error "This header is for C++ only"
+#endif
+
+#include "ggml.h"
+#include "ggml-alloc.h"
+#include "ggml-backend.h"
+#include "gguf.h"
+#include <memory>
+
+// Smart pointers for ggml types
+
+// ggml
+
+struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } };
+struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } };
+
+typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr;
+typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr;
+
+// ggml-alloc
+
+struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } };
+
+typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr;
+
+// ggml-backend
+
+struct ggml_backend_deleter        { void operator()(ggml_backend_t backend)       { ggml_backend_free(backend); } };
+struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } };
+struct ggml_backend_event_deleter  { void operator()(ggml_backend_event_t event)   { ggml_backend_event_free(event); } };
+struct ggml_backend_sched_deleter  { void operator()(ggml_backend_sched_t sched)   { ggml_backend_sched_free(sched); } };
+
+typedef std::unique_ptr<ggml_backend,        ggml_backend_deleter>        ggml_backend_ptr;
+typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr;
+typedef std::unique_ptr<ggml_backend_event,  ggml_backend_event_deleter>  ggml_backend_event_ptr;
+typedef std::unique_ptr<ggml_backend_sched,  ggml_backend_sched_deleter>  ggml_backend_sched_ptr;
--- a/dep/include/vall_e.cpp/ggml-cpu.h
+++ b/dep/include/vall_e.cpp/ggml-cpu.h
@ -0,0 +1,138 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+    // the compute plan that needs to be prepared for ggml_graph_compute()
+    // since https://github.com/ggml-org/ggml/issues/287
+    struct ggml_cplan {
+        size_t    work_size; // size of work buffer, calculated by `ggml_graph_plan()`
+        uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
+
+        int n_threads;
+        struct ggml_threadpool * threadpool;
+
+        // abort ggml_graph_compute when true
+        ggml_abort_callback abort_callback;
+        void *              abort_callback_data;
+    };
+
+    // numa strategies
+    enum ggml_numa_strategy {
+        GGML_NUMA_STRATEGY_DISABLED   = 0,
+        GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
+        GGML_NUMA_STRATEGY_ISOLATE    = 2,
+        GGML_NUMA_STRATEGY_NUMACTL    = 3,
+        GGML_NUMA_STRATEGY_MIRROR     = 4,
+        GGML_NUMA_STRATEGY_COUNT
+    };
+
+    GGML_BACKEND_API void    ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
+    GGML_BACKEND_API bool    ggml_is_numa(void); // true if init detected that system has >1 NUMA node
+
+    GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
+    GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
+
+    GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
+    GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
+
+    GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
+    GGML_BACKEND_API void    ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
+
+    GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
+    GGML_BACKEND_API void    ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
+
+    GGML_BACKEND_API float   ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
+    GGML_BACKEND_API void    ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
+
+    GGML_BACKEND_API float   ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
+    GGML_BACKEND_API void    ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
+
+    GGML_BACKEND_API struct ggml_threadpool *      ggml_threadpool_new           (struct ggml_threadpool_params  * params);
+    GGML_BACKEND_API void                          ggml_threadpool_free          (struct ggml_threadpool * threadpool);
+    GGML_BACKEND_API int                           ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
+    GGML_BACKEND_API void                          ggml_threadpool_pause         (struct ggml_threadpool * threadpool);
+    GGML_BACKEND_API void                          ggml_threadpool_resume        (struct ggml_threadpool * threadpool);
+
+    // ggml_graph_plan() has to be called before ggml_graph_compute()
+    // when plan.work_size > 0, caller must allocate memory for plan.work_data
+    GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
+                  const struct ggml_cgraph * cgraph,
+                                       int   n_threads, /* = GGML_DEFAULT_N_THREADS */
+                    struct ggml_threadpool * threadpool /* = NULL */ );
+    GGML_BACKEND_API enum ggml_status  ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
+
+    // same as ggml_graph_compute() but the work data is allocated as a part of the context
+    // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
+    GGML_BACKEND_API enum ggml_status  ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
+
+    //
+    // system info
+    //
+
+    // x86
+    GGML_BACKEND_API int ggml_cpu_has_sse3       (void);
+    GGML_BACKEND_API int ggml_cpu_has_ssse3      (void);
+    GGML_BACKEND_API int ggml_cpu_has_avx        (void);
+    GGML_BACKEND_API int ggml_cpu_has_avx_vnni   (void);
+    GGML_BACKEND_API int ggml_cpu_has_avx2       (void);
+    GGML_BACKEND_API int ggml_cpu_has_bmi2       (void);
+    GGML_BACKEND_API int ggml_cpu_has_f16c       (void);
+    GGML_BACKEND_API int ggml_cpu_has_fma        (void);
+    GGML_BACKEND_API int ggml_cpu_has_avx512     (void);
+    GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
+    GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
+    GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
+    GGML_BACKEND_API int ggml_cpu_has_amx_int8   (void);
+    // ARM
+    GGML_BACKEND_API int ggml_cpu_has_neon       (void);
+    GGML_BACKEND_API int ggml_cpu_has_arm_fma    (void);
+    GGML_BACKEND_API int ggml_cpu_has_fp16_va    (void);
+    GGML_BACKEND_API int ggml_cpu_has_dotprod    (void);
+    GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
+    GGML_BACKEND_API int ggml_cpu_has_sve        (void);
+    GGML_BACKEND_API int ggml_cpu_get_sve_cnt    (void);  // sve vector length in bytes
+    GGML_BACKEND_API int ggml_cpu_has_sme        (void);
+    // other
+    GGML_BACKEND_API int ggml_cpu_has_riscv_v    (void);
+    GGML_BACKEND_API int ggml_cpu_has_vsx        (void);
+    GGML_BACKEND_API int ggml_cpu_has_vxe        (void);
+    GGML_BACKEND_API int ggml_cpu_has_wasm_simd  (void);
+    GGML_BACKEND_API int ggml_cpu_has_llamafile  (void);
+
+    // Internal types and functions exposed for tests and benchmarks
+
+    typedef void (*ggml_vec_dot_t)  (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
+                                       const void * GGML_RESTRICT y, size_t by, int nrc);
+
+    struct ggml_type_traits_cpu {
+        ggml_from_float_t        from_float;
+        ggml_vec_dot_t           vec_dot;
+        enum ggml_type           vec_dot_type;
+        int64_t                  nrows; // number of rows to process simultaneously
+    };
+
+    GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
+
+    GGML_BACKEND_API void ggml_cpu_init(void);
+
+    //
+    // CPU backend
+    //
+
+    GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
+
+    GGML_BACKEND_API bool ggml_backend_is_cpu                (ggml_backend_t backend);
+    GGML_BACKEND_API void ggml_backend_cpu_set_n_threads     (ggml_backend_t backend_cpu, int n_threads);
+    GGML_BACKEND_API void ggml_backend_cpu_set_threadpool    (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
+    GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
+
+    GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
+
+#ifdef __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/ggml-cuda.h
+++ b/dep/include/vall_e.cpp/ggml-cuda.h
@ -0,0 +1,47 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#ifdef GGML_USE_HIP
+#define GGML_CUDA_NAME "ROCm"
+#define GGML_CUBLAS_NAME "hipBLAS"
+#elif defined(GGML_USE_MUSA)
+#define GGML_CUDA_NAME "MUSA"
+#define GGML_CUBLAS_NAME "muBLAS"
+#else
+#define GGML_CUDA_NAME "CUDA"
+#define GGML_CUBLAS_NAME "cuBLAS"
+#endif
+#define GGML_CUDA_MAX_DEVICES       16
+
+// backend API
+GGML_BACKEND_API ggml_backend_t ggml_backend_cuda_init(int device);
+
+GGML_BACKEND_API bool ggml_backend_is_cuda(ggml_backend_t backend);
+
+// device buffer
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
+
+// split tensor buffer that splits matrices by rows across multiple devices
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split);
+
+// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
+
+GGML_BACKEND_API int  ggml_backend_cuda_get_device_count(void);
+GGML_BACKEND_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
+GGML_BACKEND_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
+
+GGML_BACKEND_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
+GGML_BACKEND_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/ggml-kompute.h
+++ b/dep/include/vall_e.cpp/ggml-kompute.h
@ -0,0 +1,50 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define GGML_KOMPUTE_MAX_DEVICES 16
+
+struct ggml_vk_device {
+    int index;
+    int type; // same as VkPhysicalDeviceType
+    size_t heapSize;
+    const char * name;
+    const char * vendor;
+    int subgroupSize;
+    uint64_t bufferAlignment;
+    uint64_t maxAlloc;
+};
+
+struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
+bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
+bool ggml_vk_has_vulkan(void);
+bool ggml_vk_has_device(void);
+struct ggml_vk_device ggml_vk_current_device(void);
+
+//
+// backend API
+//
+
+// forward declaration
+typedef struct ggml_backend * ggml_backend_t;
+
+GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
+
+GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
+
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
+
+#ifdef __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/ggml-metal.h
+++ b/dep/include/vall_e.cpp/ggml-metal.h
@ -0,0 +1,66 @@
+// Note: this description is outdated
+//
+// An interface allowing to compute ggml_cgraph with Metal
+//
+// This is a fully functional interface that extends ggml with GPU support for Apple devices.
+// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
+//
+// How it works?
+//
+// As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
+// interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
+// use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
+//
+// You only need to make sure that all memory buffers that you used during the graph creation
+// are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
+// used during the graph evaluation to determine the arguments of the compute kernels.
+//
+// Synchronization between device and host memory (for example for input and output tensors)
+// is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
+//
+
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#include <stddef.h>
+#include <stdbool.h>
+
+struct ggml_tensor;
+struct ggml_cgraph;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//
+// backend API
+// user-code should use only these functions
+//
+
+GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void);
+
+GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
+
+GGML_DEPRECATED(
+        GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
+        "obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
+
+GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
+
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
+
+// helper to check if the device supports a specific family
+// ideally, the user code should be doing these checks
+// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
+GGML_BACKEND_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
+
+// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
+GGML_BACKEND_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_metal_reg(void);
+
+#ifdef __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/ggml-opencl.h
+++ b/dep/include/vall_e.cpp/ggml-opencl.h
@ -0,0 +1,26 @@
+#ifndef GGML_OPENCL_H
+#define GGML_OPENCL_H
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+//
+// backend API
+//
+GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void);
+GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend);
+
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif // GGML_OPENCL_H
--- a/dep/include/vall_e.cpp/ggml-opt.h
+++ b/dep/include/vall_e.cpp/ggml-opt.h
@ -0,0 +1,216 @@
+// This file contains functionality for training models using GGML.
+// It is not strictly needed vs. just vanilla GGML but it provides a more high-level interface for common needs such as datasets.
+// At the bottom of this file especially there are relatively high-level functions that are suitable use or adaptation in user code.
+//
+// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
+
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#include <stdint.h>
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+    struct ggml_opt_dataset;
+    struct ggml_opt_context;
+    struct ggml_opt_result;
+
+    typedef struct ggml_opt_dataset * ggml_opt_dataset_t;
+    typedef struct ggml_opt_context * ggml_opt_context_t;
+    typedef struct ggml_opt_result  * ggml_opt_result_t;
+
+    // ====== Loss ======
+
+    // built-in loss types, i.e. the built-in quantities minimized by the optimizer
+    // custom loss types can be defined via mean or sum which simply reduce the outputs for all datapoints to a single value
+    enum ggml_opt_loss_type {
+        GGML_OPT_LOSS_TYPE_MEAN,
+        GGML_OPT_LOSS_TYPE_SUM,
+        GGML_OPT_LOSS_TYPE_CROSS_ENTROPY,
+        GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR,
+    };
+
+    // ====== Dataset ======
+
+    GGML_API ggml_opt_dataset_t ggml_opt_dataset_init(
+            int64_t ne_datapoint, // number of elements per datapoint
+            int64_t ne_label,     // number of elements per label
+            int64_t ndata,        // total number of datapoints/labels
+            int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
+    GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset);
+
+    // get underlying tensors that store the data
+    GGML_API struct ggml_tensor * ggml_opt_dataset_data  (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata]
+    GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label,     ndata]
+
+    // shuffle idata first datapoints from dataset with RNG from opt_ctx, shuffle all datapoints if idata is negative
+    GGML_API void ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int64_t idata);
+
+    // get batch at position ibatch from dataset and copy the data to data_batch and labels_batch
+    GGML_API void ggml_opt_dataset_get_batch(
+            ggml_opt_dataset_t   dataset,
+            struct ggml_tensor * data_batch,   // shape = [ne_datapoint, ndata_batch]
+            struct ggml_tensor * labels_batch, // shape = [ne_label,     ndata_batch]
+            int64_t              ibatch);
+
+    // ====== Model / Context ======
+
+    enum ggml_opt_build_type {
+        GGML_OPT_BUILD_TYPE_FORWARD,
+        GGML_OPT_BUILD_TYPE_GRAD,
+        GGML_OPT_BUILD_TYPE_OPT,
+    };
+
+    // parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
+    struct ggml_opt_optimizer_params {
+        // AdamW optimizer parameters
+        struct {
+            float alpha; // learning rate
+            float beta1;
+            float beta2;
+            float eps;   // epsilon for numerical stability
+            float wd;    // weight decay for AdamW, use 0.0f to disable
+        } adamw;
+    };
+
+    // callback to calculate optimizer parameters prior to a backward pass
+    // userdata can be used to pass arbitrary data
+    typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata);
+
+    // returns the default optimizer params (constant)
+    // userdata is not used
+    GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata);
+
+    // parameters for initializing a new optimization context
+    struct ggml_opt_params {
+        ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs
+
+        struct ggml_context * ctx_compute; // created in user code, holds non-static tensors
+
+        // the forward graph is defined by inputs and outputs
+        // those tensors and all tensors inbetween are not intended to be reusable between multiple optimization contexts
+        struct ggml_tensor * inputs;
+        struct ggml_tensor * outputs;
+
+        enum ggml_opt_loss_type  loss_type;
+        enum ggml_opt_build_type build_type;
+
+        int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done
+
+        ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
+        void * get_opt_pars_ud;                     // userdata for calculating optimizer parameters
+    };
+
+    // get parameters for an optimization context with defaults set where possible
+    // parameters for which no sensible defaults exist are supplied as arguments to this function
+    GGML_API ggml_opt_params ggml_opt_default_params(
+            ggml_backend_sched_t      backend_sched,
+            struct ggml_context     * ctx_compute,
+            struct ggml_tensor      * inputs,
+            struct ggml_tensor      * outputs,
+            enum ggml_opt_loss_type   loss_type);
+
+    GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
+    GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
+
+    // set gradients to zero, initilize loss, and optionally reset the optimizer
+    GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
+
+    // get underlying tensors that store data
+    GGML_API struct ggml_tensor * ggml_opt_inputs(  ggml_opt_context_t opt_ctx); // forward graph input tensor
+    GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor
+    GGML_API struct ggml_tensor * ggml_opt_labels(  ggml_opt_context_t opt_ctx); // labels to compare outputs against
+    GGML_API struct ggml_tensor * ggml_opt_loss(    ggml_opt_context_t opt_ctx); // scalar tensor that contains the loss
+    GGML_API struct ggml_tensor * ggml_opt_pred(    ggml_opt_context_t opt_ctx); // predictions made by outputs
+    GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels
+
+    GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
+
+    // ====== Optimization Result ======
+
+    GGML_API ggml_opt_result_t ggml_opt_result_init();
+    GGML_API void ggml_opt_result_free(ggml_opt_result_t result);
+    GGML_API void ggml_opt_result_reset(ggml_opt_result_t result);
+
+    // get data from result, uncertainties are optional and can be ignored by passing NULL
+    GGML_API void ggml_opt_result_ndata(   ggml_opt_result_t result, int64_t * ndata);                  // writes 1 value, number of datapoints
+    GGML_API void ggml_opt_result_loss(    ggml_opt_result_t result, double  * loss,     double * unc); // writes 1 value
+    GGML_API void ggml_opt_result_pred(    ggml_opt_result_t result, int32_t * pred);                   // writes ndata values
+    GGML_API void ggml_opt_result_accuracy(ggml_opt_result_t result, double  * accuracy, double * unc); // writes 1 value
+
+    // ====== Computation ======
+
+    // do forward pass, increment result if not NULL
+    GGML_API void ggml_opt_forward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
+
+    // do forward pass, increment result if not NULL, do backward pass
+    GGML_API void ggml_opt_forward_backward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
+
+    // ############################################################################
+    // ## The high-level functions start here. They do not depend on any private ##
+    // ## functions or structs and can be copied to and adapted for user code.   ##
+    // ############################################################################
+
+    // ====== Intended Usage ======
+    //
+    // 1. Select the appropriate loss for your problem.
+    // 2. Create a dataset and set the data for the "data" tensor. Also set the "labels" tensor if your loss needs them.
+    //    Setting the shard size to 1 will be fine, it's the granularity with which data is shuffled/loaded (bigger values are faster).
+    // 3. Create a GGML graph for your model with no_alloc == true. Use two separate contexts for the tensors.
+    //    The first context should contain the model parameters and inputs and be allocated statically in user code.
+    //    The second context should contain all other tensors and will be (re)allocated automatically.
+    //    Due to this automated allocation the data of the second context is not defined when accessed in user code.
+    //    Note that the second dimension of the inputs/outputs are interpreted as the number of datapoints in those tensors.
+    // 4. Call ggml_opt_fit. If you need more control you can use ggml_opt_epoch instead.
+
+    // signature for a callback while evaluating opt_ctx on dataset, called after an evaluation
+    typedef void (*ggml_opt_epoch_callback)(
+            bool               train,       // true after training evaluation, false after validation evaluation
+            ggml_opt_context_t opt_ctx,
+            ggml_opt_dataset_t dataset,
+            ggml_opt_result_t  result,      // result associated with the dataset subsection
+            int64_t            ibatch,      // number of batches that have been evaluated so far
+            int64_t            ibatch_max,  // total number of batches in this dataset subsection
+            int64_t            t_start_us); // time at which the evaluation on the dataset subsection was started
+
+    // do training on front of dataset, do evaluation only on back of dataset
+    GGML_API void ggml_opt_epoch(
+            ggml_opt_context_t      opt_ctx,
+            ggml_opt_dataset_t      dataset,
+            ggml_opt_result_t       result_train,   // result to increment during training, ignored if NULL
+            ggml_opt_result_t       result_eval,    // result to increment during evaluation, ignored if NULL
+            int64_t                 idata_split,    // data index at which to split training and evaluation
+            ggml_opt_epoch_callback callback_train,
+            ggml_opt_epoch_callback callback_eval);
+
+    // callback that prints a progress bar on stderr
+    GGML_API void ggml_opt_epoch_callback_progress_bar(
+            bool               train,
+            ggml_opt_context_t opt_ctx,
+            ggml_opt_dataset_t dataset,
+            ggml_opt_result_t  result,
+            int64_t            ibatch,
+            int64_t            ibatch_max,
+            int64_t            t_start_us);
+
+    // fit model defined by inputs and outputs to dataset
+    GGML_API void ggml_opt_fit(
+            ggml_backend_sched_t            backend_sched,  // backend scheduler for constructing the compute graphs
+            ggml_context                  * ctx_compute,    // context with temporarily allocated tensors to calculate the outputs
+            ggml_tensor                   * inputs,         // input tensor with shape [ne_datapoint, ndata_batch]
+            ggml_tensor                   * outputs,        // output tensor, must have shape [ne_label, ndata_batch] if labels are used
+            ggml_opt_dataset_t              dataset,        // dataset with data and optionally also labels
+            enum ggml_opt_loss_type         loss_type,      // loss to minimize
+            ggml_opt_get_optimizer_params   get_opt_pars,   // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
+            int64_t                         nepoch,         // how many times the dataset should be iterated over
+            int64_t                         nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
+            float                           val_split,      // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
+            bool                            silent);        // whether or not info prints to stderr should be suppressed
+
+#ifdef  __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/ggml-rpc.h
+++ b/dep/include/vall_e.cpp/ggml-rpc.h
@ -0,0 +1,30 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#define GGML_RPC_MAX_SERVERS       16
+
+// backend API
+GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
+GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend);
+
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
+
+GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
+
+GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
+                                                    const char * cache_dir,
+                                                    size_t free_mem, size_t total_mem);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
+
+GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/ggml-sycl.h
+++ b/dep/include/vall_e.cpp/ggml-sycl.h
@ -0,0 +1,49 @@
+//
+//  MIT license
+//  Copyright (C) 2024 Intel Corporation
+//  SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#define GGML_SYCL_NAME "SYCL"
+#define GGML_SYCL_MAX_DEVICES 48
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+// backend API
+GGML_BACKEND_API ggml_backend_t ggml_backend_sycl_init(int device);
+
+GGML_BACKEND_API bool ggml_backend_is_sycl(ggml_backend_t backend);
+
+// devide buffer
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
+
+// split tensor buffer that splits matrices by rows across multiple devices
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
+
+// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
+
+GGML_BACKEND_API void ggml_backend_sycl_print_sycl_devices(void);
+GGML_BACKEND_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len);
+GGML_BACKEND_API void ggml_backend_sycl_get_device_description(int device,
+                                                       char *description,
+                                                       size_t description_size);
+GGML_BACKEND_API int  ggml_backend_sycl_get_device_count();
+GGML_BACKEND_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
+
+// SYCL doesn't support registering host memory, keep here for reference
+// GGML_BACKEND_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
+// GGML_BACKEND_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_sycl_reg(void);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/ggml-vulkan.h
+++ b/dep/include/vall_e.cpp/ggml-vulkan.h
@ -0,0 +1,29 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#define GGML_VK_NAME "Vulkan"
+#define GGML_VK_MAX_DEVICES 16
+
+// backend API
+GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
+
+GGML_BACKEND_API bool ggml_backend_is_vk(ggml_backend_t backend);
+GGML_BACKEND_API int  ggml_backend_vk_get_device_count(void);
+GGML_BACKEND_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
+GGML_BACKEND_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
+
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
+// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_vk_reg(void);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/dep/include/vall_e.cpp/ggml.h
+++ b/dep/include/vall_e.cpp/ggml.h
--- a/dep/include/vall_e.cpp/llama-cpp.h
+++ b/dep/include/vall_e.cpp/llama-cpp.h
@ -0,0 +1,30 @@
+#pragma once
+
+#ifndef __cplusplus
+#error "This header is for C++ only"
+#endif
+
+#include <memory>
+
+#include "llama.h"
+
+struct llama_model_deleter {
+    void operator()(llama_model * model) { llama_model_free(model); }
+};
+
+struct llama_context_deleter {
+    void operator()(llama_context * context) { llama_free(context); }
+};
+
+struct llama_sampler_deleter {
+    void operator()(llama_sampler * sampler) { llama_sampler_free(sampler); }
+};
+
+struct llama_adapter_lora_deleter {
+    void operator()(llama_adapter_lora * adapter) { llama_adapter_lora_free(adapter); }
+};
+
+typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;
+typedef std::unique_ptr<llama_context, llama_context_deleter> llama_context_ptr;
+typedef std::unique_ptr<llama_sampler, llama_sampler_deleter> llama_sampler_ptr;
+typedef std::unique_ptr<llama_adapter_lora, llama_adapter_lora_deleter> llama_adapter_lora_ptr;
--- a/dep/include/vall_e.cpp/llama-impl.h
+++ b/dep/include/vall_e.cpp/llama-impl.h
@ -0,0 +1,61 @@
+#pragma once
+
+#include "ggml.h" // for ggml_log_level
+
+#include <string>
+#include <vector>
+
+#ifdef __GNUC__
+#    if defined(__MINGW32__) && !defined(__clang__)
+#        define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
+#    else
+#        define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
+#    endif
+#else
+#    define LLAMA_ATTRIBUTE_FORMAT(...)
+#endif
+
+//
+// logging
+//
+
+LLAMA_ATTRIBUTE_FORMAT(2, 3)
+void llama_log_internal        (ggml_log_level level, const char * format, ...);
+void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
+
+#define LLAMA_LOG(...)       llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
+#define LLAMA_LOG_INFO(...)  llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
+#define LLAMA_LOG_WARN(...)  llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
+#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
+#define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
+#define LLAMA_LOG_CONT(...)  llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
+
+//
+// helpers
+//
+
+template <typename T>
+struct no_init {
+    T value;
+    no_init() { /* do nothing */ }
+};
+
+struct time_meas {
+    time_meas(int64_t & t_acc, bool disable = false);
+    ~time_meas();
+
+    const int64_t t_start_us;
+
+    int64_t & t_acc;
+};
+
+void replace_all(std::string & s, const std::string & search, const std::string & replace);
+
+// TODO: rename to llama_format ?
+LLAMA_ATTRIBUTE_FORMAT(1, 2)
+std::string format(const char * fmt, ...);
+
+std::string llama_format_tensor_shape(const std::vector<int64_t> & ne);
+std::string llama_format_tensor_shape(const struct ggml_tensor * t);
+
+std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i);
--- a/dep/include/vall_e.cpp/llama-vocab.h
+++ b/dep/include/vall_e.cpp/llama-vocab.h
@ -0,0 +1,125 @@
+#pragma once
+
+#include "llama.h"
+
+#include <string>
+#include <vector>
+#include <memory>
+
+struct LLM_KV;
+struct llama_model_loader;
+
+struct llama_vocab {
+    struct token_data {
+        std::string      text;
+        float            score;
+        llama_token_attr attr;
+    };
+
+    llama_vocab();
+    ~llama_vocab();
+
+    void load(llama_model_loader & ml, const LLM_KV & kv);
+
+    enum llama_vocab_type     get_type()     const;
+    enum llama_vocab_pre_type get_pre_type() const;
+
+    uint32_t n_tokens() const;
+    uint32_t n_token_types() const;
+
+    std::string type_name() const;
+
+    bool is_normal      (llama_token id) const;
+    bool is_unknown     (llama_token id) const;
+    bool is_control     (llama_token id) const;
+    bool is_byte        (llama_token id) const;
+    bool is_user_defined(llama_token id) const;
+    bool is_unused      (llama_token id) const;
+    bool is_eog         (llama_token id) const;
+
+    uint8_t     token_to_byte(llama_token id) const;
+    llama_token byte_to_token(uint8_t ch)     const;
+
+    llama_token text_to_token(const std::string & text) const;
+
+    const token_data & get_token_data(llama_token id) const;
+
+    const char *     token_get_text (llama_token id) const;
+    float            token_get_score(llama_token id) const;
+    llama_token_attr token_get_attr (llama_token id) const;
+
+    llama_token token_bos() const;
+    llama_token token_eos() const;
+    llama_token token_eot() const;
+    llama_token token_eom() const;
+    llama_token token_unk() const;
+    llama_token token_sep() const;
+    llama_token token_nl () const;
+    llama_token token_pad() const;
+
+    llama_token token_prefix() const;
+    llama_token token_middle() const;
+    llama_token token_suffix() const;
+
+    llama_token token_fim_pre() const;
+    llama_token token_fim_suf() const;
+    llama_token token_fim_mid() const;
+    llama_token token_fim_pad() const;
+    llama_token token_fim_rep() const;
+    llama_token token_fim_sep() const;
+
+    bool get_add_space_prefix          () const;
+    bool get_add_bos                   () const;
+    bool get_add_eos                   () const;
+    bool get_ignore_merges             () const;
+    bool get_clean_spaces              () const;
+    bool get_remove_extra_whitespaces  () const;
+    bool get_escape_whitespaces        () const;
+    bool get_treat_whitespace_as_suffix() const;
+
+    int max_token_len() const;
+
+    int find_bpe_rank(const std::string & token_left, const std::string & token_right) const;
+
+    int32_t tokenize(
+                   const char * text,
+                      int32_t   text_len,
+                  llama_token * tokens,
+                      int32_t   n_tokens_max,
+                         bool   add_special,
+                         bool   parse_special) const;
+
+    std::vector<llama_token> tokenize(
+            const std::string & raw_text,
+                         bool   add_special,
+                         bool   parse_special = false) const;
+
+    // does not write null-terminator to buf
+    int32_t token_to_piece(
+                  llama_token   token,
+                         char * buf,
+                      int32_t   length,
+                      int32_t   lstrip,
+                         bool   special) const;
+
+    // use cached data
+    const std::string & token_to_piece(llama_token token) const;
+
+    int32_t detokenize(
+            const llama_token * tokens,
+                      int32_t   n_tokens,
+                         char * text,
+                      int32_t   text_len_max,
+                         bool   remove_special,
+                         bool   unparse_special) const;
+
+    std::string detokenize(
+            const std::vector<llama_token> & tokens,
+                                      bool   special) const;
+
+    void print_info() const;
+
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
--- a/dep/include/vall_e.cpp/llama.h
+++ b/dep/include/vall_e.cpp/llama.h
--- a/dep/include/vall_e.cpp/llama_hack.h
+++ b/dep/include/vall_e.cpp/llama_hack.h
@ -0,0 +1,358 @@
+#pragma once
+
+#include "llama-vocab.h"
+#include <array>
+
+/* Begin cringe so I can access the model's tok_embd */
+// it needs to be copied so the struct layout is exactly as it is under llama.cpp
+#define LLAMA_MAX_LAYERS  512
+#define LLAMA_MAX_EXPERTS 160  // DeepSeekV2
+
+enum llm_type {
+    LLM_TYPE_UNKNOWN,
+};
+
+enum llm_arch {
+	LLM_ARCH_UNKNOWN,
+};
+
+enum llama_expert_gating_func_type {
+    LLAMA_EXPERT_GATING_FUNC_TYPE_NONE    = 0,
+    LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX = 1,
+    LLAMA_EXPERT_GATING_FUNC_TYPE_SIGMOID = 2,
+};
+
+struct llama_hparams_posnet {
+	uint32_t n_embd;
+	uint32_t n_layer;
+};
+
+struct llama_hparams_convnext {
+	uint32_t n_embd;
+	uint32_t n_layer;
+};
+
+struct llama_hparams {
+	bool vocab_only;
+    bool rope_finetuned;
+    bool use_par_res;
+    bool swin_norm;
+
+    uint32_t n_ctx_train; // context size the model was trained on
+    uint32_t n_embd;
+    uint32_t n_embd_features = 0;
+    uint32_t n_layer;
+    uint32_t n_rot;
+    uint32_t n_swa = 0; // sliding window attention (SWA)
+    uint32_t n_swa_pattern = 1; // by default, all layers use non-sliding-window attention
+    uint32_t n_embd_head_k; // dimension of keys (d_k). d_q is assumed to be the same, but there are n_head q heads, and only n_head_kv k-v heads
+    uint32_t n_embd_head_v; // dimension of values (d_v) aka n_embd_head
+    uint32_t n_expert = 0;
+    uint32_t n_expert_used = 0;
+    uint32_t n_rel_attn_bkts = 0;
+
+    // for WavTokenizer
+    struct llama_hparams_posnet   posnet;
+    struct llama_hparams_convnext convnext;
+
+    std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_arr;
+    std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_kv_arr;
+    std::array<uint32_t, LLAMA_MAX_LAYERS> n_ff_arr;
+
+    uint32_t n_layer_dense_lead = 0;
+    uint32_t n_lora_q           = 0;
+    uint32_t n_lora_kv          = 0;
+    uint32_t n_ff_exp           = 0;
+    uint32_t n_ff_shexp         = 0;
+    uint32_t n_expert_shared    = 0;
+    uint32_t n_norm_groups      = 0;
+
+    float    expert_weights_scale = 0.0;
+    bool     expert_weights_norm  = false;
+    uint32_t expert_gating_func   = LLAMA_EXPERT_GATING_FUNC_TYPE_NONE;
+
+    float f_norm_eps;
+    float f_norm_rms_eps;
+    float f_norm_group_eps;
+
+    float f_attn_logit_softcapping  = 50.0f;
+    float f_final_logit_softcapping = 30.0f;
+
+    // for RWKV
+    uint32_t rescale_every_n_layers = 0;
+    uint32_t time_mix_extra_dim     = 0;
+    uint32_t time_decay_extra_dim   = 0;
+    uint32_t wkv_head_size          = 0;
+    uint32_t token_shift_count      = 2;
+    uint32_t n_lora_decay           = 0;
+    uint32_t n_lora_iclr            = 0;
+    uint32_t n_lora_value_res_mix   = 0;
+    uint32_t n_lora_gate            = 0;
+
+    float    rope_attn_factor = 1.0f;
+    float    rope_freq_base_train;
+    float    rope_freq_base_train_swa;
+    float    rope_freq_scale_train;
+    float    rope_freq_scale_train_swa;
+    uint32_t n_ctx_orig_yarn;
+    float    rope_yarn_log_mul;
+
+    std::array<int, 4> rope_sections;
+
+    // for State Space Models
+    uint32_t ssm_d_conv  = 0;
+    uint32_t ssm_d_inner = 0;
+    uint32_t ssm_d_state = 0;
+    uint32_t ssm_dt_rank = 0;
+
+    bool ssm_dt_b_c_rms = false;
+
+    float f_clamp_kqv      = 0.0f;
+    float f_max_alibi_bias = 0.0f;
+    float f_logit_scale    = 0.0f;
+
+    // Additional scale factors (Granite/Granite MoE)
+    float f_residual_scale  = 0.0f;
+    float f_embedding_scale = 0.0f;
+    float f_attention_scale = 0.0f;
+
+    bool causal_attn   = true;
+    bool use_alibi     = false;
+    bool attn_soft_cap = false;
+
+    // needed by encoder-decoder models (e.g. T5, FLAN-T5)
+    // ref: https://github.com/ggerganov/llama.cpp/pull/8141
+    llama_token dec_start_token_id = LLAMA_TOKEN_NULL;
+
+    enum llama_pooling_type      pooling_type            = LLAMA_POOLING_TYPE_NONE;
+    enum llama_rope_type         rope_type               = LLAMA_ROPE_TYPE_NONE;
+    enum llama_rope_scaling_type rope_scaling_type_train = LLAMA_ROPE_SCALING_TYPE_NONE;
+
+    uint32_t n_head(uint32_t il = 0) const;
+
+    uint32_t n_head_kv(uint32_t il = 0) const;
+
+    uint32_t n_ff(uint32_t il = 0) const;
+
+    uint32_t n_gqa(uint32_t il = 0) const;
+
+    // dimension of key embeddings across all k-v heads
+    uint32_t n_embd_k_gqa(uint32_t il = 0) const;
+
+    // dimension of value embeddings across all k-v heads
+    uint32_t n_embd_v_gqa(uint32_t il = 0) const;
+
+    // dimension of the rolling state embeddings
+    // corresponds to Mamba's conv_states size or RWKV's token_shift states size
+    uint32_t n_embd_k_s() const;
+
+    // dimension of the recurrent state embeddings
+    uint32_t n_embd_v_s() const;
+
+    bool is_swa(uint32_t il) const;
+};
+
+struct llama_model {
+    llm_type type = LLM_TYPE_UNKNOWN;
+    llm_arch arch = LLM_ARCH_UNKNOWN;
+
+    std::string name = "n/a";
+
+    llama_hparams hparams = {};
+    llama_vocab   vocab;
+
+    struct ggml_tensor * tok_embd   = nullptr;
+    struct ggml_tensor * type_embd  = nullptr;
+    struct ggml_tensor * pos_embd   = nullptr;
+    struct ggml_tensor * tok_norm   = nullptr;
+    struct ggml_tensor * tok_norm_b = nullptr;
+
+    struct ggml_tensor * output_norm     = nullptr;
+    struct ggml_tensor * output_norm_b   = nullptr;
+    struct ggml_tensor * output          = nullptr;
+    struct ggml_tensor * output_b        = nullptr;
+    struct ggml_tensor * output_norm_enc = nullptr;
+
+    // classifier
+    struct ggml_tensor * cls       = nullptr;
+    struct ggml_tensor * cls_b     = nullptr;
+    struct ggml_tensor * cls_out   = nullptr;
+    struct ggml_tensor * cls_out_b = nullptr;
+
+    struct ggml_tensor * conv1d = nullptr;
+    struct ggml_tensor * conv1d_b = nullptr;
+};
+
+struct llama_vocab_hack {
+    struct token_data {
+        std::string      text;
+        float            score;
+        llama_token_attr attr;
+    };
+
+    llama_vocab_hack();
+    ~llama_vocab_hack();
+
+    void load(llama_model_loader & ml, const LLM_KV & kv);
+
+    enum llama_vocab_type     get_type()     const;
+    enum llama_vocab_pre_type get_pre_type() const;
+
+    uint32_t n_tokens() const;
+    uint32_t n_token_types() const;
+
+    std::string type_name() const;
+
+    bool is_normal      (llama_token id) const;
+    bool is_unknown     (llama_token id) const;
+    bool is_control     (llama_token id) const;
+    bool is_byte        (llama_token id) const;
+    bool is_user_defined(llama_token id) const;
+    bool is_unused      (llama_token id) const;
+    bool is_eog         (llama_token id) const;
+
+    uint8_t     token_to_byte(llama_token id) const;
+    llama_token byte_to_token(uint8_t ch)     const;
+
+    llama_token text_to_token(const std::string & text) const;
+
+    const token_data & get_token_data(llama_token id) const;
+
+    const char *     token_get_text (llama_token id) const;
+    float            token_get_score(llama_token id) const;
+    llama_token_attr token_get_attr (llama_token id) const;
+
+    llama_token token_bos() const;
+    llama_token token_eos() const;
+    llama_token token_eot() const;
+    llama_token token_eom() const;
+    llama_token token_unk() const;
+    llama_token token_sep() const;
+    llama_token token_nl () const;
+    llama_token token_pad() const;
+
+    llama_token token_prefix() const;
+    llama_token token_middle() const;
+    llama_token token_suffix() const;
+
+    llama_token token_fim_pre() const;
+    llama_token token_fim_suf() const;
+    llama_token token_fim_mid() const;
+    llama_token token_fim_pad() const;
+    llama_token token_fim_rep() const;
+    llama_token token_fim_sep() const;
+
+    bool get_add_space_prefix          () const;
+    bool get_add_bos                   () const;
+    bool get_add_eos                   () const;
+    bool get_ignore_merges             () const;
+    bool get_clean_spaces              () const;
+    bool get_remove_extra_whitespaces  () const;
+    bool get_escape_whitespaces        () const;
+    bool get_treat_whitespace_as_suffix() const;
+
+    int max_token_len() const;
+
+    int find_bpe_rank(const std::string & token_left, const std::string & token_right) const;
+
+    int32_t tokenize(
+                   const char * text,
+                      int32_t   text_len,
+                  llama_token * tokens,
+                      int32_t   n_tokens_max,
+                         bool   add_special,
+                         bool   parse_special) const;
+
+    std::vector<llama_token> tokenize(
+            const std::string & raw_text,
+                         bool   add_special,
+                         bool   parse_special = false) const;
+
+    // does not write null-terminator to buf
+    int32_t token_to_piece(
+                  llama_token   token,
+                         char * buf,
+                      int32_t   length,
+                      int32_t   lstrip,
+                         bool   special) const;
+
+    // use cached data
+    const std::string & token_to_piece(llama_token token) const;
+
+    int32_t detokenize(
+            const llama_token * tokens,
+                      int32_t   n_tokens,
+                         char * text,
+                      int32_t   text_len_max,
+                         bool   remove_special,
+                         bool   unparse_special) const;
+
+    std::string detokenize(
+            const std::vector<llama_token> & tokens,
+                                      bool   special) const;
+
+    void print_info() const;
+
+    struct impl {
+	    uint32_t n_token_types = 0; // for BERT-style token types
+
+	    enum llama_vocab_type     type     = LLAMA_VOCAB_TYPE_SPM;
+	    enum llama_vocab_pre_type pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+
+	    int max_token_len = 0; // used for optimizing longest token search
+
+	    // default LLaMA special tokens
+	    // TODO: should we set all of these to LLAMA_TOKEN_NULL?
+	    llama_token special_bos_id  = 1;
+	    llama_token special_eos_id  = 2;
+	    llama_token special_eot_id  = LLAMA_TOKEN_NULL;
+	    llama_token special_eom_id  = LLAMA_TOKEN_NULL;
+	    llama_token special_unk_id  = 0;
+	    llama_token special_sep_id  = LLAMA_TOKEN_NULL;
+	    llama_token special_pad_id  = LLAMA_TOKEN_NULL;
+	    llama_token special_mask_id = LLAMA_TOKEN_NULL;
+
+	    llama_token linefeed_id = 13;
+
+	    // fim tokens
+	    llama_token special_fim_pre_id = LLAMA_TOKEN_NULL;
+	    llama_token special_fim_suf_id = LLAMA_TOKEN_NULL;
+	    llama_token special_fim_mid_id = LLAMA_TOKEN_NULL;
+	    llama_token special_fim_pad_id = LLAMA_TOKEN_NULL;
+	    llama_token special_fim_rep_id = LLAMA_TOKEN_NULL; // repo
+	    llama_token special_fim_sep_id = LLAMA_TOKEN_NULL; // file separator
+
+	    // tokenizer flags
+	    bool add_space_prefix           = false;
+	    bool add_bos                    = false;
+	    bool add_eos                    = false;
+	    bool ignore_merges              = false;
+	    bool clean_spaces               = false;  // clean_up_tokenization_spaces
+	    bool remove_extra_whitespaces   = false;
+	    bool escape_whitespaces         = true;
+	    bool treat_whitespace_as_suffix = false;
+
+	    std::unordered_map<std::string, llama_token> token_to_id;
+	    std::vector<token_data>                      id_to_token;
+	};
+    std::unique_ptr<impl> pimpl;
+};
+
+/* BEGIN VALL-E SPECIFIC HELPERS */
+struct ggml_tensor * llama_get_embedding_weights(struct llama_model * model) {
+    return model->tok_embd;
+}
+struct ggml_tensor * llama_get_output_head_tensor(struct llama_model * model ) {
+    return model->output;
+}
+void llama_set_output_head(struct llama_model * model, struct ggml_tensor* tensor ) {
+    // set the output tensor
+    model->output = tensor;
+    // required to properly output logits
+    llama_vocab_hack* vocab = (llama_vocab_hack*) const_cast<llama_vocab*>(llama_model_get_vocab( model ));
+    vocab->pimpl->id_to_token.resize( tensor->ne[1] );
+    // *const_cast<uint32_t*>(&model->hparams.n_vocab) = tensor->ne[1];
+}
+/* END VALL-E SPECIFIC HELPERS */
+
+/* End cringe code */
--- a/dep/include/vall_e.cpp/lstm.h
+++ b/dep/include/vall_e.cpp/lstm.h
@ -0,0 +1,78 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-alloc.h"
+
+#include "ops.h"
+
+struct encodec_lstm {
+    struct ggml_tensor *l0_ih_w;
+    struct ggml_tensor *l0_hh_w;
+
+    struct ggml_tensor *l0_ih_b;
+    struct ggml_tensor *l0_hh_b;
+
+    struct ggml_tensor *l1_ih_w;
+    struct ggml_tensor *l1_hh_w;
+
+    struct ggml_tensor *l1_ih_b;
+    struct ggml_tensor *l1_hh_b;
+};
+
+struct ggml_tensor *forward_pass_lstm_unilayer(struct ggml_context *ctx0,
+                                               struct ggml_tensor  *inp,
+                                               struct ggml_tensor  *weight_ih,
+                                               struct ggml_tensor  *weight_hh,
+                                               struct ggml_tensor  *bias_ih,
+                                               struct ggml_tensor  *bias_hh,
+                                               char                *prefix) {
+    const int seq_length = inp->ne[0];
+    const int input_dim  = inp->ne[1];
+    const int hidden_dim = weight_ih->ne[1] / 4;
+
+    char ct_name[10];
+    char ht_name[10];
+
+    snprintf(ct_name, 10, "%s_ct", prefix);
+    snprintf(ht_name, 10, "%s_ht", prefix);
+
+    struct ggml_tensor *hs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length);
+    ggml_set_input(hs);
+
+    struct ggml_tensor *c_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim);
+    ggml_set_input(c_t);
+    ggml_set_name(c_t, ct_name);
+
+    struct ggml_tensor *h_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim);
+    ggml_set_input(h_t);
+    ggml_set_name(h_t, ht_name);
+
+    struct ggml_tensor *current = ggml_cont(ctx0, ggml_transpose(ctx0, inp));
+
+    for (int t = 0; t < seq_length; t++) {
+        struct ggml_tensor *x_t = ggml_view_1d(ctx0, current, input_dim, t * current->nb[1]);
+
+        struct ggml_tensor *inp_gates = ggml_mul_mat(ctx0, weight_ih, x_t);
+        inp_gates = ggml_add(ctx0, inp_gates, bias_ih);
+
+        struct ggml_tensor *hid_gates = ggml_mul_mat(ctx0, weight_hh, h_t);
+        hid_gates = ggml_add(ctx0, hid_gates, bias_hh);
+
+        struct ggml_tensor *out_gates = ggml_add(ctx0, inp_gates, hid_gates);
+
+        struct ggml_tensor *i_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 0 * sizeof(float) * hidden_dim));
+        struct ggml_tensor *f_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 1 * sizeof(float) * hidden_dim));
+        struct ggml_tensor *g_t = ggml_tanh(ctx0   , ggml_view_1d(ctx0, out_gates, hidden_dim, 2 * sizeof(float) * hidden_dim));
+        struct ggml_tensor *o_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 3 * sizeof(float) * hidden_dim));
+
+        c_t = ggml_add(ctx0, ggml_mul(ctx0, f_t, c_t), ggml_mul(ctx0, i_t, g_t));
+
+        h_t = ggml_mul(ctx0, o_t, ggml_tanh(ctx0, c_t));
+
+        hs = ggml_set_1d(ctx0, hs, h_t, t * hs->nb[1]);
+    }
+
+    hs = ggml_cont(ctx0, ggml_transpose(ctx0, hs));
+
+    return hs;
+}
--- a/dep/include/vall_e.cpp/ops.h
+++ b/dep/include/vall_e.cpp/ops.h
@ -0,0 +1,17 @@
+#pragma once
+
+#include "ggml.h"
+
+struct ggml_tensor *pad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
+                           int padding_left, int padding_right);
+
+struct ggml_tensor *unpad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
+                             int padding_left, int padding_right);
+
+struct ggml_tensor *strided_conv_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
+                                    struct ggml_tensor *conv_w, struct ggml_tensor *conv_b,
+                                    int stride);
+
+struct ggml_tensor *strided_conv_transpose_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
+                                              struct ggml_tensor *conv_w, struct ggml_tensor *conv_b,
+                                              int stride);
--- a/dep/include/vall_e.cpp/quantizer.h
+++ b/dep/include/vall_e.cpp/quantizer.h
@ -0,0 +1,111 @@
+#pragma once
+
+#include <cassert>
+#include <vector>
+
+#include "ggml.h"
+#include "ggml-alloc.h"
+#include "ggml-backend.h"
+
+#include "utils.h"
+
+struct encodec_quant_block {
+    struct ggml_tensor *embed;
+};
+
+struct encodec_quantizer {
+    std::vector<encodec_quant_block> blocks;
+};
+
+struct ggml_tensor *encodec_forward_quantizer_encode(
+    const struct encodec_quantizer *quantizer, struct ggml_context *ctx0,
+    struct ggml_tensor *encoded_inp, const int n_bins, const int sr, const int bandwidth,
+    const int hop_length) {
+
+    if (!encoded_inp) {
+        fprintf(stderr, "%s: null input tensor\n", __func__);
+        return NULL;
+    }
+
+    const int frame_rate = (int)ceilf(sr / hop_length);
+    const int n_q = get_num_quantizers_for_bandwidth(n_bins, frame_rate, bandwidth);
+
+    const int seq_length = encoded_inp->ne[0];
+
+    struct ggml_tensor *codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, seq_length, n_q);
+    ggml_set_input(codes);
+
+    struct ggml_tensor *inpL = ggml_cont(ctx0, ggml_transpose(ctx0, encoded_inp));
+    struct ggml_tensor *residual = inpL;
+    struct ggml_tensor *indices;
+
+    for (int i = 0; i < n_q; i++) {
+        encodec_quant_block block = quantizer->blocks[i];
+
+        // compute distance
+        // [seq_length, n_bins]
+        struct ggml_tensor *dp = ggml_scale(
+            ctx0, ggml_mul_mat(ctx0, block.embed, residual), -2.0f);
+
+        // [n_bins]
+        struct ggml_tensor *sqr_embed = ggml_sqr(ctx0, block.embed);
+        struct ggml_tensor *sqr_embed_nrm = ggml_sum_rows(ctx0, sqr_embed);
+
+        // [seq_length]
+        struct ggml_tensor *sqr_inp = ggml_sqr(ctx0, residual);
+        struct ggml_tensor *sqr_inp_nrm = ggml_sum_rows(ctx0, sqr_inp);
+
+        // [seq_length, n_bins]
+        struct ggml_tensor *dist = ggml_add(ctx0, ggml_repeat(ctx0, sqr_inp_nrm, dp), dp);
+        dist = ggml_add(ctx0, ggml_repeat(ctx0, ggml_transpose(ctx0, sqr_embed_nrm), dist), dist);
+        dist = ggml_neg(ctx0, dist);
+
+        // take the argmax over the column dimension
+        // [seq_length]
+        indices = ggml_argmax(ctx0, dist);
+
+        // look up in embedding table
+        struct ggml_tensor *quantized = ggml_get_rows(ctx0, block.embed, indices);
+
+        residual = ggml_sub(ctx0, residual, quantized);
+
+        codes = ggml_set_1d(ctx0, codes, indices, i * codes->nb[1]);
+    }
+
+    return codes;
+}
+
+struct ggml_tensor *encodec_forward_quantizer_decode(
+    const struct encodec_quantizer *quantizer, struct ggml_context *ctx0,
+    struct ggml_tensor *codes, const int hidden_dim, const int n_bins, const int sr, const int bandwidth,
+    const int hop_length) {
+
+    if (!codes) {
+        fprintf(stderr, "%s: null input tensor\n", __func__);
+        return NULL;
+    }
+
+    const int seq_length = codes->ne[0];
+
+    const int frame_rate = (int)ceilf(sr / hop_length);
+    const int n_q = get_num_quantizers_for_bandwidth(n_bins, frame_rate, bandwidth);
+
+    assert(n_q == codes->ne[1]);
+
+    struct ggml_tensor *quantized_out = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length);
+    ggml_set_input(quantized_out);
+    ggml_set_name(quantized_out, "quantized_out");
+
+    for (int i = 0; i < n_q; i++) {
+        encodec_quant_block block = quantizer->blocks[i];
+
+        struct ggml_tensor *indices = ggml_view_1d(ctx0, codes, seq_length, i * codes->nb[1]);
+        struct ggml_tensor *quantized = ggml_get_rows(ctx0, block.embed, indices);
+
+        quantized_out = ggml_add(ctx0, quantized_out, quantized);
+    }
+
+    quantized_out = ggml_cont(ctx0, ggml_transpose(ctx0, quantized_out));
+
+    return quantized_out;
+}
--- a/dep/include/vall_e.cpp/utils.h
+++ b/dep/include/vall_e.cpp/utils.h
@ -0,0 +1,30 @@
+#pragma once
+
+#include <cstddef>
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+const size_t MB = 1024 * 1024;
+
+template <typename T>
+void read_safe(std::ifstream &infile, T &dest) {
+    infile.read((char *)&dest, sizeof(T));
+}
+
+int32_t get_num_codebooks(float bandwidth, int hop_length, float sample_rate) {
+    // The number of codebooks is determined by the bandwidth selected.
+    // Supported bandwidths are 1.5kbps (n_q = 2), 3 kbps (n_q = 4), 6 kbps (n_q = 8),
+    // 12 kbps (n_q = 16) and 24kbps (n_q = 32).
+    return (int32_t)ceilf(1000 * bandwidth / (ceilf(sample_rate / hop_length) * 10));
+}
+
+int32_t get_bandwidth_per_quantizer(int bins, float frame_rate) {
+    return log2f((float)bins) * frame_rate;
+}
+
+int32_t get_num_quantizers_for_bandwidth(int bins, float frame_rate, float bandwidth) {
+    float bw_per_q = get_bandwidth_per_quantizer(bins, frame_rate);
+    int32_t n_q = MAX(1, floorf(bandwidth * 1000 / bw_per_q));
+    return n_q;
+}
--- a/dep/include/vall_e.cpp/vall_e.h
+++ b/dep/include/vall_e.cpp/vall_e.h
@ -0,0 +1,178 @@
+#pragma once
+
+// C++ deps
+#include <string>
+#include <vector>
+#include <unordered_map>
+
+#include <llama.h>
+
+// handles defining platform specific macros and import/export decorators (copied from my engine's uf/config.h)
+#if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+	// Windows
+	#define VALL_E_ENV "Windows"
+	#define VALL_E_ENV_WINDOWS 1
+	#define VALL_E_ENV_HEADER "windows.h"
+	#if defined(__CYGWIN__)
+		#define to_string(var) string(var)
+	#endif
+	#ifndef _WIN32_WINNT
+		#define _WIN32_WINNT 0x0600
+	#endif
+	#ifndef WINVER
+		#define WINVER 0x0600
+	#endif
+	
+	#define VALL_E_IO_ROOT "./data/"
+#elif defined(linux) || defined(__linux)
+	// Linux
+	#define VALL_E_ENV "Linux"
+	#define VALL_E_ENV_LINUX 1
+	#define VALL_E_ENV_HEADER "linux.h"
+	
+	#define VALL_E_IO_ROOT "./data/"
+#elif defined(__APPLE__) || defined(MACOSX) || defined(macintosh) || defined(Macintosh)
+	// MacOS
+	#define VALL_E_ENV "OSX"
+	#define VALL_E_ENV_OSX 1
+	#define VALL_E_ENV_HEADER "osx.h"
+	
+	#define VALL_E_IO_ROOT "./data/"
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+	// FreeBSD
+	#define VALL_E_ENV "FreeBSD"
+	#define VALL_E_ENV_FREEBSD 1
+	#define VALL_E_ENV_HEADER "freebsd.h"
+	
+	#define VALL_E_IO_ROOT "./data/"
+#elif defined(__sh__)
+	// Dreamcast
+	#define VALL_E_ENV "Dreamcast"
+	#define VALL_E_ENV_DREAMCAST 1
+	#define VALL_E_ENV_HEADER "dreamcast.h"
+	#include VALL_E_ENV_HEADER
+
+	#define _arch_dreamcast
+
+	#define VALL_E_IO_ROOT "/cd/"
+#else
+	// Unsupported system
+	#define VALL_E_ENV "Unknown"
+	#define VALL_E_ENV_UNKNOWN 1
+	#define VALL_E_ENV_HEADER "unknown.h"
+	#warning Using "unknown"
+ 	#error No support
+#endif
+
+#if !defined(VALL_E_STATIC)
+	#if defined(VALL_E_ENV_WINDOWS)
+		// Windows compilers need specific (and different) keywords for export and import
+		#define VALL_E_API_EXPORT __declspec(dllexport)
+		#define VALL_E_API_IMPORT __declspec(dllimport)
+		// For Visual C++ compilers, we also need to turn off this annoying C4251 warning
+		#ifdef _MSC_VER
+			#pragma warning(disable : 4251)
+		#endif
+	#else // Linux, FreeBSD, Mac OS X
+		#if __GNUC__ >= 4
+			// GCC 4 has special keywords for showing/hidding symbols,
+			// the same keyword is used for both importing and exporting
+			#define VALL_E_API_EXPORT __attribute__ ((__visibility__ ("default")))
+			#define VALL_E_API_IMPORT __attribute__ ((__visibility__ ("default")))
+		#else
+			// GCC < 4 has no mechanism to explicitely hide symbols, everything's exported
+			#define VALL_E_API_EXPORT
+			#define VALL_E_API_IMPORT
+		#endif
+	#endif
+#else
+	// Static build doesn't need import/export macros
+	#define VALL_E_API_EXPORT
+	#define VALL_E_API_IMPORT
+#endif
+
+#ifdef VALL_E_EXPORTS
+	#define VALL_E_API VALL_E_API_EXPORT
+#else
+	#define VALL_E_API VALL_E_API_IMPORT
+#endif
+
+typedef llama_token token_t;
+typedef std::vector<std::vector<token_t>> vall_e_audio_codes_t;
+
+const int ENCODEC_FRAMES_PER_SECOND = 75;
+const int MAX_DURATION = ENCODEC_FRAMES_PER_SECOND * 12;
+const int CTX_SIZE = 2048;
+const int N_THREADS = 8;
+const int N_GPU_LAYERS = 99;
+
+const int MODALITY_AR_NAR = 0;
+const int MODALITY_NAR_LEN = 1;
+
+// forward declarations
+struct io_map_t;
+struct llama_model;
+struct llama_context;
+struct encodec_context;
+
+// model-specific parameters
+struct vall_e_context_params_t {
+	std::string model_path = "./data/vall_e.gguf";
+	std::string encodec_path = "./data/encodec.bin";
+	int32_t gpu_layers = N_GPU_LAYERS;
+	int32_t n_threads = N_THREADS;
+	int32_t ctx_size = CTX_SIZE;
+	bool verbose = false;
+};
+// inference-specific arguments
+struct vall_e_args_t {
+	std::string text = "Hello world.";
+	std::string prompt_path = "./data/prom.wav";
+	std::string output_path = "./data/resp.wav";
+	std::string language = "en";
+	std::string task = "tts";
+	int modality = MODALITY_NAR_LEN;
+	int max_steps = 30;
+	int max_duration = MAX_DURATION;
+};
+// stores everything needed for vall_e.cpp at runtime
+struct vall_e_context_t {
+	vall_e_context_params_t params;
+
+	io_map_t* io_map = NULL; // pointer for reasons
+
+	struct {
+		llama_model* model = NULL;
+		llama_context* ctx = NULL;
+	} llama;
+
+	struct {
+		encodec_context* ctx;
+	} encodec;
+};
+// stores the raw inputs to be fed
+struct vall_e_inputs_t {
+	std::string task = "tts";
+	std::string lang = "en";
+
+	token_t rvq_l = 0;
+
+	std::vector<token_t> phn = {};
+	vall_e_audio_codes_t prom = {};
+	vall_e_audio_codes_t resp = {};
+};
+
+// encodec helpers
+VALL_E_API std::vector<float> read_audio_from_disk( const std::string& path );
+VALL_E_API void write_audio_to_disk( const std::vector<float>& waveform, const std::string& path );
+
+VALL_E_API std::vector<std::vector<int32_t>> encode_audio( struct encodec_context* ectx, const std::vector<float>& waveform );
+VALL_E_API std::vector<float> decode_audio( struct encodec_context* ectx, const vall_e_audio_codes_t& codes_2d );
+
+// context management
+VALL_E_API void vall_e_print_usage( char** argv, const vall_e_context_params_t& params, const vall_e_args_t& args );
+VALL_E_API bool vall_e_args_parse( int argc, char** argv, vall_e_context_params_t& params, vall_e_args_t& args );
+VALL_E_API vall_e_context_t* vall_e_load( const vall_e_context_params_t& params );
+VALL_E_API vall_e_inputs_t vall_e_prepare_inputs( vall_e_context_t* ctx, const std::string& text, const std::string& prompt_path, const std::string& lang = "auto", const std::string& task = "tts" );
+VALL_E_API vall_e_audio_codes_t vall_e_generate( vall_e_context_t* ctx, vall_e_inputs_t& inputs, int max_steps, int max_duration, int modality = MODALITY_NAR_LEN );
+VALL_E_API void vall_e_free( vall_e_context_t* ctx );
--- a/docs/README.md
+++ b/docs/README.md
@ -27,9 +27,12 @@ To be filled.
 	* *very* loosely integrated
 	* basic shapes and triangulated mesh collision and some form of ray queries
 * OpenAL for audio
-	* Currently only loads from ogg (vorbis) files
+	* Currently only loads from `.ogg` (vorbis) files
 	* Supports loading in full and streaming
 	* *very* loosely integrated
+* Speech synthesis using [vall_e.cpp](https://github.com/e-c-k-e-r/vall-e/)
+	* `win64.gcc.vulkan` binaries can be found [here](https://github.com/e-c-k-e-r/vall-e/releases/tag/vall_e.cpp), if compiled.
+	* currently only generates `.wav` files

 ## Supported Systems

--- a/engine/inc/uf/ext/vall_e/vall_e.h
+++ b/engine/inc/uf/ext/vall_e/vall_e.h
@ -0,0 +1,16 @@
+#pragma once
+
+#include <uf/config.h>
+
+#if UF_USE_VALL_E
+
+#include <vall_e.cpp/vall_e.h>
+namespace ext {
+	namespace vall_e {
+		void UF_API initialize( const std::string& model_path = "", const std::string& encodec_path = "" );
+		std::string UF_API generate( const std::string& text, const std::string& prom, const std::string& lang = "en" );
+		void UF_API terminate();
+	}
+}
+
+#endif
--- a/engine/inc/uf/spec/context/context.h
+++ b/engine/inc/uf/spec/context/context.h
@ -7,4 +7,4 @@
 #include "universal.h"
 // defines which implementation to use
 #include UF_ENV_HEADER
-//
+//
--- a/engine/inc/uf/spec/controller/controller.h
+++ b/engine/inc/uf/spec/controller/controller.h
@ -7,4 +7,4 @@
 #include "universal.h"
 // defines which implementation to use
 #include UF_ENV_HEADER
-// this line is required
+//
--- a/engine/inc/uf/spec/time/time.h
+++ b/engine/inc/uf/spec/time/time.h
@ -7,4 +7,4 @@
 #include "universal.h"
 // defines which implementation to use
 #include UF_ENV_HEADER
-//
+//
--- a/engine/inc/uf/spec/time/universal.h
+++ b/engine/inc/uf/spec/time/universal.h
@ -12,6 +12,7 @@ namespace spec {
 		protected:
 			
 		public:
+			spec::uni::Time::time_t UF_API_CALL unixTime();
 			spec::uni::Time::time_t UF_API_CALL getTime();
 		};
 	};
--- a/engine/inc/uf/spec/window/window.h
+++ b/engine/inc/uf/spec/window/window.h
@ -7,4 +7,4 @@
 #include "universal.h"
 // defines which implementation to use
 #include UF_ENV_HEADER
-//
+//
--- a/engine/inc/uf/utils/thread/thread.h
+++ b/engine/inc/uf/utils/thread/thread.h
@ -18,8 +18,9 @@

 namespace uf {
 	namespace thread {
-		extern UF_API uf::stl::string workerThreadName;
 		extern UF_API uf::stl::string mainThreadName;
+		extern UF_API uf::stl::string workerThreadName;
+		extern UF_API uf::stl::string asyncThreadName;
 	}
 }

--- a/engine/inc/uf/utils/time/time.h
+++ b/engine/inc/uf/utils/time/time.h
@ -90,6 +90,8 @@ namespace uf {
 		extern UF_API double previous;
 		extern UF_API float delta;
 		extern UF_API float clamp;
+
+		size_t UF_API time();
 	}
 }

--- a/engine/src/ext/imgui/imgui.cpp
+++ b/engine/src/ext/imgui/imgui.cpp
@ -118,7 +118,26 @@ namespace {
 				this->scroll.bottom = true;
 				reclaimFocus = true;

-				uf::console::execute( command );
+				// to-do: add a way to either asynchronously invoke commands or not
+
+				uf::thread::queue( uf::thread::asyncThreadName, [=](){
+					uf::console::execute( command );
+				});
+			/*
+				// this blocks
+				uf::thread::queue( uf::thread::fetchWorker(), [=](){
+					uf::console::execute( command );
+				});
+			*/
+			/*
+				// this still blocks
+				auto tasks = uf::thread::schedule(true);
+				tasks.queue([=](){
+					uf::console::execute( command );
+				});
+				uf::thread::execute( tasks );
+			*/
+
 			}

 			ImGui::SetItemDefaultFocus();
--- a/engine/src/ext/vall_e/vall_e.cpp
+++ b/engine/src/ext/vall_e/vall_e.cpp
@ -0,0 +1,52 @@
+#include <uf/ext/vall_e/vall_e.h>
+#include <uf/utils/time/time.h>
+
+#if UF_USE_VALL_E
+namespace {
+	vall_e_context_t* ctx = NULL;
+}
+
+void ext::vall_e::initialize( const std::string& model_path, const std::string& encodec_path ) {
+	vall_e_context_params_t params;
+	params.model_path = model_path == "" ? "./data/llm/vall_e.gguf" : model_path;
+	params.encodec_path = encodec_path == "" ? "./data/llm/encodec.bin" : encodec_path;
+	params.gpu_layers = N_GPU_LAYERS;
+	params.n_threads = N_THREADS;
+	params.ctx_size = CTX_SIZE;
+	params.verbose = false;
+
+	::ctx = vall_e_load( params );
+	if ( !::ctx || !::ctx->llama.model || !::ctx->llama.ctx || !::ctx->encodec.ctx  ) {
+		UF_MSG_ERROR("failed to initialize vall_e.cpp");
+		return;
+	}
+}
+std::string ext::vall_e::generate( const std::string& text, const std::string& prom, const std::string& lang ) {	
+	if ( !::ctx ) return "";
+
+	std::string path = "./data/tmp/" + std::to_string(uf::time::time()) + ".wav";
+
+	vall_e_args_t args;
+	args.text = text;
+	args.prompt_path = prom;
+	args.output_path = path;
+	args.language = lang == "" ? "en" : lang;
+	args.task = "tts";
+	args.modality = MODALITY_NAR_LEN;
+	args.max_steps = 30;
+	args.max_duration = MAX_DURATION;
+
+	auto inputs = vall_e_prepare_inputs( ::ctx, args.text, args.prompt_path, args.language );
+	auto output_audio_codes = vall_e_generate( ::ctx, inputs, args.max_steps, args.max_duration, args.modality );
+	auto waveform = decode_audio( ::ctx->encodec.ctx, output_audio_codes );
+	write_audio_to_disk( waveform, args.output_path );
+	//UF_MSG_DEBUG("Generated to {}", path);
+
+	return path;
+}
+void ext::vall_e::terminate() {	
+	if ( !::ctx ) return;
+
+	vall_e_free( ::ctx );
+}
+#endif
--- a/engine/src/spec/time/universal.cpp
+++ b/engine/src/spec/time/universal.cpp
@ -12,6 +12,9 @@ namespace {
 	chrono_time_t start = getTimePoint();
 }

+spec::uni::Time::time_t spec::uni::Time::unixTime() {
+	return std::chrono::duration_cast<std::chrono::microseconds>(getTimePoint().time_since_epoch()).count();
+}
 spec::uni::Time::time_t spec::uni::Time::getTime() {
 	std::chrono::duration<double> elapsed = getTimePoint() - start;
 	return elapsed.count() * 1000000;
--- a/engine/src/utils/io/console.cpp
+++ b/engine/src/utils/io/console.cpp
@ -38,18 +38,29 @@ void uf::console::initialize() {
 	});

 	uf::console::registerCommand("callHook", "Calls a hook, passing the arguments as a JSON object", [&]( const uf::stl::string& arguments )->uf::stl::string{
-		auto match = uf::string::match( arguments, "/^(.+?)(?: (.+?))?$/" );
+		auto match = uf::string::match( arguments, "/^\"?(.+?)\"?(?: (.+?))?$/" );
 		if ( match.empty() ) return "invalid invocation";

+		uf::stl::vector<pod::Hook::userdata_t> results;
 		if ( match.size() > 2 ) {
 			ext::json::Value json;
 			ext::json::decode( json, match[2] );
-			uf::hooks.call( match[1], json );
+
+			results = uf::hooks.call( match[1], json );
 		} else {
-			uf::hooks.call( match[1] );
+			results = uf::hooks.call( match[1] );
 		}

-		return "Hook executed: " + match[1];
+		// this could probably be its own function
+		uf::stl::string s_result = "";
+		for ( auto i = 0; i < results.size(); ++i ) {
+			auto& res = results[i];
+			if ( res.is<uf::stl::string>() ) s_result += ::fmt::format("\n[{}] => {}", i, res.as<uf::stl::string>());
+			else if ( res.is<ext::json::Value>() ) s_result += ::fmt::format("\n[{}] => {}", i, ext::json::encode( res.as<ext::json::Value>() ));
+			else s_result += ::fmt::format("\n[{}] => Userdata: {}", i, (void*) res);
+		}
+
+		return "Hook executed: " + match[1] + s_result;
 	});
 	
 	uf::console::registerCommand("json", "Modifies the gamestate by setting a JSON value", [&]( const uf::stl::string& arguments )->uf::stl::string{
--- a/engine/src/utils/thread/thread.cpp
+++ b/engine/src/utils/thread/thread.cpp
@ -8,8 +8,9 @@ float uf::thread::limiter = 1.0f / 120.0f;
 uint uf::thread::workers = 1;
 std::thread::id uf::thread::mainThreadId = std::this_thread::get_id();
 bool uf::thread::async = false;
-uf::stl::string uf::thread::workerThreadName = "Worker";
 uf::stl::string uf::thread::mainThreadName = "Main";
+uf::stl::string uf::thread::workerThreadName = "Worker";
+uf::stl::string uf::thread::asyncThreadName = "Async";

 #define UF_THREAD_ANNOUNCE(...) UF_MSG_DEBUG(__VA_ARGS__)

--- a/engine/src/utils/time/time.cpp
+++ b/engine/src/utils/time/time.cpp
@ -10,4 +10,8 @@ size_t uf::time::frame = 0;
 double uf::time::current = 0;
 double uf::time::previous = 0;
 float uf::time::delta = 0;
-float uf::time::clamp = 0;
+float uf::time::clamp = 0;
+
+size_t uf::time::time() {
+	return spec::time.unixTime();
+}
--- a/ext/main.cpp
+++ b/ext/main.cpp
@ -1,10 +1,10 @@
 #include "main.h"
+#include "ext.h"

-#include <uf/ext/ext.h>
-#include <uf/ext/oal/oal.h>
-
-#include <uf/spec/terminal/terminal.h>
-#include <uf/spec/controller/controller.h>
+#include <fstream>
+#include <iostream>
+#include <regex>
+#include <sys/stat.h>

 #include <uf/utils/time/time.h>
 #include <uf/utils/audio/audio.h>
@ -22,32 +22,27 @@
 #include <uf/utils/graphic/graphic.h>
 #include <uf/utils/camera/camera.h>
 #include <uf/utils/http/http.h>
+#include <uf/utils/renderer/renderer.h>
+#include <uf/utils/io/console.h>
+#include <uf/utils/io/inputs.h>
+#include <uf/spec/terminal/terminal.h>
+#include <uf/spec/controller/controller.h>
+#include <uf/utils/memory/string.h>

 #include <uf/engine/entity/entity.h>
 #include <uf/engine/graph/graph.h>
-#include <uf/utils/io/inputs.h>
-
-#include <sys/stat.h>
-
-#include <uf/utils/memory/string.h>
-#include <fstream>
-#include <iostream>
-
-#include <regex>
-
-#include "ext.h"
-
 #include <uf/engine/scene/scene.h>
 #include <uf/engine/asset/asset.h>

-#include <uf/utils/renderer/renderer.h>
-#include <uf/utils/io/console.h>
+#include <uf/ext/ext.h>
+#include <uf/ext/oal/oal.h>
 #include <uf/ext/discord/discord.h>
 #include <uf/ext/openvr/openvr.h>
 #include <uf/ext/lua/lua.h>
 #include <uf/ext/ultralight/ultralight.h>
-#include <uf/ext/imgui/imgui.h>
 #include <uf/ext/ffx/fsr.h>
+#include <uf/ext/imgui/imgui.h>
+#include <uf/ext/vall_e/vall_e.h>

 bool ext::ready = false;
 uf::stl::vector<uf::stl::string> ext::arguments;
@ -88,6 +83,11 @@ namespace {
 				struct {
 					bool enabled;
 				} ultralight, discord, imgui;
+				struct {
+					bool enabled;
+					std::string model_path = "";
+					std::string encodec_path = "";
+				} vall_e;
 			} ext;

 			struct {
@ -118,6 +118,10 @@ void EXT_API ext::load( ext::json::Value& json ) {
 	::config.engine.ext.ultralight.enabled = json["engine"]["ext"]["ultralight"]["enabled"].as(::config.engine.ext.ultralight.enabled);
 	::config.engine.ext.discord.enabled = json["engine"]["ext"]["discord"]["enabled"].as(::config.engine.ext.discord.enabled);
 	::config.engine.ext.imgui.enabled = json["engine"]["ext"]["imgui"]["enabled"].as(::config.engine.ext.imgui.enabled);
+	
+	::config.engine.ext.vall_e.enabled = json["engine"]["ext"]["vall_e"]["enabled"].as(::config.engine.ext.vall_e.enabled);
+	::config.engine.ext.vall_e.model_path = json["engine"]["ext"]["vall_e"]["model_path"].as(::config.engine.ext.vall_e.model_path);
+	::config.engine.ext.vall_e.encodec_path = json["engine"]["ext"]["vall_e"]["encodec_path"].as(::config.engine.ext.vall_e.encodec_path);

 	::config.engine.limiter.print = json["engine"]["debug"]["framerate"]["print"].as(::config.engine.limiter.print);

@ -305,6 +309,9 @@ void EXT_API ext::initialize() {
 	/* Setup deferred Main thread */ {
 		uf::thread::get(uf::thread::mainThreadName);
 	}
+	/* Setup non-blocking, asynchronous thread */ {
+		uf::thread::get(uf::thread::asyncThreadName);
+	}
 	/* set JSON implicit preferences */ {
 		ext::json::PREFERRED_ENCODING = ::json["engine"]["ext"]["json"]["encoding"].as(ext::json::PREFERRED_ENCODING);
 		ext::json::PREFERRED_COMPRESSION = ::json["engine"]["ext"]["json"]["compression"].as(ext::json::PREFERRED_COMPRESSION);
@ -717,6 +724,23 @@ void EXT_API ext::initialize() {
 	if ( ::config.engine.ext.imgui.enabled ) {
 	//	ext::imgui::initialize();
 	}
+#endif
+#if UF_USE_VALL_E
+	if ( ::config.engine.ext.vall_e.enabled ) {
+		ext::vall_e::initialize( ::config.engine.ext.vall_e.model_path, ::config.engine.ext.vall_e.encodec_path );
+
+		// bind the hook
+		uf::hooks.addHook( "llm:VALL-E.synthesize", [&](ext::json::Value& json){
+			auto text = json["text"].as<uf::stl::string>();
+			auto prom = json["prom"].as<uf::stl::string>();
+		
+			auto path = ext::vall_e::generate( text, prom );
+			
+			UF_MSG_DEBUG("Called {} {}: {}", text, prom, path);
+
+			return path;
+		});
+	}
 #endif
 	/* Add hooks */ {

@ -1138,6 +1162,11 @@ void EXT_API ext::terminate() {
 	/* Terminate controllers */ {
 		spec::controller::terminate();
 	}
+#if UF_USE_VALL_E
+	if ( ::config.engine.ext.vall_e.enabled ) {
+		ext::vall_e::terminate();
+	}
+#endif
 #if UF_USE_IMGUI
 	if ( ::config.engine.ext.imgui.enabled ) {
 		ext::imgui::terminate();