vall-e/vall_e.cpp/include/encodec.h

184 lines
7.0 KiB
C

/*
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2024 Pierre-Antoine Bannier │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
/*
* This file contains the declarations of the structs and functions used in the encodec library.
* The library provides functionality for audio compression and decompression using a custom model.
* The model consists of an encoder, a quantizer and a decoder, each with their own set of parameters.
* The library also provides functions for loading and freeing the model, as well as compressing and decompressing audio data.
*
*/
#pragma once
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "ggml.h"
#ifdef __cplusplus
extern "C" {
#endif
struct encodec_context;
struct encodec_statistics {
// The time taken to load the model.
int64_t t_load_us;
// The time taken to compute the model.
int64_t t_compute_us;
};
/**
* Loads an encodec model from the specified file path.
*
* @param model_path The file path to the encodec model.
* @param offset The offset (in bytes) to the start of the model in the file.
* @param n_gpu_layers The number of GPU layers to use.
* @return A pointer to the encodec context struct.
*/
struct encodec_context *encodec_load_model(
const char *model_path,
const int offset,
int n_gpu_layers);
/**
* Sets the target bandwidth for the given encodec context.
*
* @param ectx The encodec context to set the target bandwidth for.
* @param bandwidth The target bandwidth to set, in bits per second.
*/
void encodec_set_target_bandwidth(
struct encodec_context *ectx,
int bandwidth);
/**
* Sets the sample rate for the given encodec context.
*
* @param ectx The encodec context to set the target bandwidth for.
* @param sample_rate The sample rate to set.
*/
void encodec_set_sample_rate(
struct encodec_context *ectx,
int sample_rate);
/**
* Reconstructs audio from raw audio data using the specified encodec context.
*
* @param ectx The encodec context to use for reconstruction.
* @param raw_audio The raw audio data to reconstruct.
* @param n_samples The number of samples in the raw audio buffer.
* @param n_threads The number of threads to use for reconstruction.
* @return True if the reconstruction was successful, false otherwise.
*/
bool encodec_reconstruct_audio(
struct encodec_context *ectx,
const float *raw_audio,
const int n_samples,
int n_threads);
/**
* Compresses audio data using the specified encodec context.
*
* @param ectx The encodec context to use for compression.
* @param raw_audio The raw audio data to compress.
* @param n_samples The number of samples in the raw audio buffer.
* @param n_threads The number of threads to use for compression.
* @return True if the compression was successful, false otherwise.
*/
bool encodec_compress_audio(
struct encodec_context *ectx,
const float *raw_audio,
const int n_samples,
int n_threads);
/**
* Decompresses audio data using the specified encodec context.
*
* @param ectx The encodec context to use for decompression.
* @param codes The compressed audio data to decompress.
* @param n_codes The number of codes in the codes buffer.
* @param n_threads The number of threads to use for decompression.
* @return True if the audio data was successfully decompressed, false otherwise.
*/
bool encodec_decompress_audio(
struct encodec_context *ectx,
const int32_t *codes,
const int n_codes,
int n_threads);
/**
* Gets the audio data from the given encodec context.
*
* @param ectx The encodec context to get the audio data from.
* @return A pointer to the audio data.
*/
float * encodec_get_audio(
struct encodec_context *ectx);
/**
* Gets the size of the audio data from the given encodec context.
*
* @param ectx The encodec context to get the audio size from.
* @return The size of the audio data.
*/
int encodec_get_audio_size(
struct encodec_context *ectx);
/**
* Gets the code data from the given encodec context.
*
* @param ectx The encodec context to get the code data from.
* @return A pointer to the code data.
*/
int32_t * encodec_get_codes(
struct encodec_context *ectx);
/**
* Gets the size of the code data from the given encodec context.
*
* @param ectx The encodec context to get the code size from.
* @return The size of the code data.
*/
int encodec_get_codes_size(
struct encodec_context *ectx);
/**
* Gets the statistics for the given encodec context.
*
* @param ectx The encodec context to get the statistics for.
* @return A pointer to the statistics struct.
*/
const struct encodec_statistics* encodec_get_statistics(
struct encodec_context *ectx);
/**
* Reset the statistics for the given encodec context.
*
* @param ectx The encodec context to reset the statistics for.
*/
void encodec_reset_statistics(
struct encodec_context *ectx);
/**
* @brief Frees the memory allocated for an encodec context.
*
* @param ectx The encodec context to free.
*/
void encodec_free(
struct encodec_context *ectx);
#ifdef __cplusplus
}
#endif