vall-e/vall_e.cpp/include/encodec.h

184 lines
7.0 KiB
C
Raw Normal View History

2024-12-25 04:39:32 +00:00
/*
Copyright 2024 Pierre-Antoine Bannier
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
/*
* This file contains the declarations of the structs and functions used in the encodec library.
* The library provides functionality for audio compression and decompression using a custom model.
* The model consists of an encoder, a quantizer and a decoder, each with their own set of parameters.
* The library also provides functions for loading and freeing the model, as well as compressing and decompressing audio data.
*
*/
#pragma once
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "ggml.h"
#ifdef __cplusplus
extern "C" {
#endif
struct encodec_context;
struct encodec_statistics {
// The time taken to load the model.
int64_t t_load_us;
// The time taken to compute the model.
int64_t t_compute_us;
};
/**
* Loads an encodec model from the specified file path.
*
* @param model_path The file path to the encodec model.
* @param offset The offset (in bytes) to the start of the model in the file.
* @param n_gpu_layers The number of GPU layers to use.
* @return A pointer to the encodec context struct.
*/
struct encodec_context *encodec_load_model(
const char *model_path,
const int offset,
int n_gpu_layers);
/**
* Sets the target bandwidth for the given encodec context.
*
* @param ectx The encodec context to set the target bandwidth for.
* @param bandwidth The target bandwidth to set, in bits per second.
*/
void encodec_set_target_bandwidth(
struct encodec_context *ectx,
int bandwidth);
/**
* Sets the sample rate for the given encodec context.
*
* @param ectx The encodec context to set the target bandwidth for.
* @param sample_rate The sample rate to set.
*/
void encodec_set_sample_rate(
struct encodec_context *ectx,
int sample_rate);
/**
* Reconstructs audio from raw audio data using the specified encodec context.
*
* @param ectx The encodec context to use for reconstruction.
* @param raw_audio The raw audio data to reconstruct.
* @param n_samples The number of samples in the raw audio buffer.
* @param n_threads The number of threads to use for reconstruction.
* @return True if the reconstruction was successful, false otherwise.
*/
bool encodec_reconstruct_audio(
struct encodec_context *ectx,
const float *raw_audio,
const int n_samples,
int n_threads);
/**
* Compresses audio data using the specified encodec context.
*
* @param ectx The encodec context to use for compression.
* @param raw_audio The raw audio data to compress.
* @param n_samples The number of samples in the raw audio buffer.
* @param n_threads The number of threads to use for compression.
* @return True if the compression was successful, false otherwise.
*/
bool encodec_compress_audio(
struct encodec_context *ectx,
const float *raw_audio,
const int n_samples,
int n_threads);
/**
* Decompresses audio data using the specified encodec context.
*
* @param ectx The encodec context to use for decompression.
* @param codes The compressed audio data to decompress.
* @param n_codes The number of codes in the codes buffer.
* @param n_threads The number of threads to use for decompression.
* @return True if the audio data was successfully decompressed, false otherwise.
*/
bool encodec_decompress_audio(
struct encodec_context *ectx,
const int32_t *codes,
const int n_codes,
int n_threads);
/**
* Gets the audio data from the given encodec context.
*
* @param ectx The encodec context to get the audio data from.
* @return A pointer to the audio data.
*/
float * encodec_get_audio(
struct encodec_context *ectx);
/**
* Gets the size of the audio data from the given encodec context.
*
* @param ectx The encodec context to get the audio size from.
* @return The size of the audio data.
*/
int encodec_get_audio_size(
struct encodec_context *ectx);
/**
* Gets the code data from the given encodec context.
*
* @param ectx The encodec context to get the code data from.
* @return A pointer to the code data.
*/
int32_t * encodec_get_codes(
struct encodec_context *ectx);
/**
* Gets the size of the code data from the given encodec context.
*
* @param ectx The encodec context to get the code size from.
* @return The size of the code data.
*/
int encodec_get_codes_size(
struct encodec_context *ectx);
/**
* Gets the statistics for the given encodec context.
*
* @param ectx The encodec context to get the statistics for.
* @return A pointer to the statistics struct.
*/
const struct encodec_statistics* encodec_get_statistics(
struct encodec_context *ectx);
/**
* Reset the statistics for the given encodec context.
*
* @param ectx The encodec context to reset the statistics for.
*/
void encodec_reset_statistics(
struct encodec_context *ectx);
/**
* @brief Frees the memory allocated for an encodec context.
*
* @param ectx The encodec context to free.
*/
void encodec_free(
struct encodec_context *ectx);
#ifdef __cplusplus
}
#endif