ugh
This commit is contained in:
parent
b6692ce3de
commit
c34763769a
|
@ -89,28 +89,7 @@ std::vector<float> read_2d_tensor( struct ggml_tensor* tensor ) {
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
ggml_tensor* view_2d_tensor( struct ggml_tensor* tensor, int32_t start, int32_t end, int32_t dim ) {
|
|
||||||
// to-do: implement other dim
|
|
||||||
if ( start < 0 ) start = tensor->ne[1] + start;
|
|
||||||
if ( end < 0 ) end = tensor->ne[1] + end;
|
|
||||||
|
|
||||||
ggml_tensor* res = new ggml_tensor();
|
|
||||||
memcpy( res, tensor, sizeof(ggml_tensor) );
|
|
||||||
|
|
||||||
res->op = GGML_OP_VIEW;
|
|
||||||
res->src[0] = tensor;
|
|
||||||
|
|
||||||
res->data += res->nb[1] * start;
|
|
||||||
res->ne[1] = end - start;
|
|
||||||
|
|
||||||
for (int i = 2; i < GGML_MAX_DIMS; i++) {
|
|
||||||
res->nb[i] = res->nb[i - 1] * res->ne[i - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
ggml_tensor* view_2d_tensor( struct ggml_context* ctx, struct ggml_tensor* tensor, int32_t start, int32_t end, int32_t dim ) {
|
ggml_tensor* view_2d_tensor( struct ggml_context* ctx, struct ggml_tensor* tensor, int32_t start, int32_t end, int32_t dim ) {
|
||||||
// to-do: implement other dim
|
// to-do: implement other dim
|
||||||
if ( start < 0 ) start = tensor->ne[1] + start;
|
if ( start < 0 ) start = tensor->ne[1] + start;
|
||||||
|
@ -601,13 +580,14 @@ std::vector<token_t> generate( vall_e_context_t* ctx, vall_e_inputs_t& inputs, i
|
||||||
int32_t seq_len = n_outputs;
|
int32_t seq_len = n_outputs;
|
||||||
int32_t top_k = 0;
|
int32_t top_k = 0;
|
||||||
float top_p = 1.0;
|
float top_p = 1.0;
|
||||||
float temperature = 1.0f;
|
float temperature = 1.5f;
|
||||||
float cfg_strength = 2.5f;
|
float cfg_strength = 3.0f;
|
||||||
float start_noise = 0.0f;
|
float start_noise = 0.0f;
|
||||||
float end_noise = 1.0f;
|
float end_noise = 1.0f;
|
||||||
bool annealed_sampling = true;
|
bool annealed_sampling = true;
|
||||||
bool remasking = true;
|
bool remasking = true;
|
||||||
float cfg_rescale = 0.75f;
|
float cfg_rescale = 0.75f;
|
||||||
|
bool entropy_scoring = true;
|
||||||
|
|
||||||
// fill with masked tokens
|
// fill with masked tokens
|
||||||
output_tokens.clear();
|
output_tokens.clear();
|
||||||
|
@ -621,7 +601,7 @@ std::vector<token_t> generate( vall_e_context_t* ctx, vall_e_inputs_t& inputs, i
|
||||||
llama_batch null_batch = llama_batch_init( ctx->params.ctx_size, ctx->io_map->n_embd, ctx->params.ctx_size );
|
llama_batch null_batch = llama_batch_init( ctx->params.ctx_size, ctx->io_map->n_embd, ctx->params.ctx_size );
|
||||||
|
|
||||||
// token scores to reference for masking
|
// token scores to reference for masking
|
||||||
std::vector<float> scores(n_outputs, 1.0);
|
std::vector<float> scores(n_outputs, entropy_scoring ? 0.0 : 1.0);
|
||||||
|
|
||||||
// do one step on many tokens
|
// do one step on many tokens
|
||||||
for ( auto step = 0; step < steps; ++step ) {
|
for ( auto step = 0; step < steps; ++step ) {
|
||||||
|
@ -635,7 +615,7 @@ std::vector<token_t> generate( vall_e_context_t* ctx, vall_e_inputs_t& inputs, i
|
||||||
float sampling_cfg_strength = annealed_sampling ? timestep * cfg_strength : cfg_strength;
|
float sampling_cfg_strength = annealed_sampling ? timestep * cfg_strength : cfg_strength;
|
||||||
|
|
||||||
float noise_p = cos( timestep * PI * 0.5f );
|
float noise_p = cos( timestep * PI * 0.5f );
|
||||||
float remask_p = remasking ? 0.5f / steps : 0.0f;
|
float remask_p = remasking ? 1.0f / (steps * 2.0f) : 0.0f;
|
||||||
|
|
||||||
int32_t n_masked_tokens = (noise_p + remask_p) * seq_len;
|
int32_t n_masked_tokens = (noise_p + remask_p) * seq_len;
|
||||||
if ( n_masked_tokens < 1 ) {
|
if ( n_masked_tokens < 1 ) {
|
||||||
|
@ -651,7 +631,9 @@ std::vector<token_t> generate( vall_e_context_t* ctx, vall_e_inputs_t& inputs, i
|
||||||
std::vector<score_t> sorted_scores( n_outputs );
|
std::vector<score_t> sorted_scores( n_outputs );
|
||||||
for ( auto i = 0; i < n_outputs; ++i ) sorted_scores[i] = { i, scores[i] };
|
for ( auto i = 0; i < n_outputs; ++i ) sorted_scores[i] = { i, scores[i] };
|
||||||
std::sort(sorted_scores.begin(), sorted_scores.end());
|
std::sort(sorted_scores.begin(), sorted_scores.end());
|
||||||
// std::reverse(sorted_scores.begin(), sorted_scores.end());
|
if ( entropy_scoring) {
|
||||||
|
std::reverse(sorted_scores.begin(), sorted_scores.end());
|
||||||
|
}
|
||||||
|
|
||||||
// and top-k pick the worst scores
|
// and top-k pick the worst scores
|
||||||
for ( auto i = 0; i < n_masked_tokens; ++i ) {
|
for ( auto i = 0; i < n_masked_tokens; ++i ) {
|
||||||
|
@ -669,8 +651,8 @@ std::vector<token_t> generate( vall_e_context_t* ctx, vall_e_inputs_t& inputs, i
|
||||||
inputs.resp[0] = output_tokens;
|
inputs.resp[0] = output_tokens;
|
||||||
fill_batch( batch, inputs, *ctx->io_map, mode );
|
fill_batch( batch, inputs, *ctx->io_map, mode );
|
||||||
// update null batch
|
// update null batch
|
||||||
null_input.resp[0] = output_tokens;
|
|
||||||
null_batch.n_tokens = 0;
|
null_batch.n_tokens = 0;
|
||||||
|
null_input.resp[0] = output_tokens;
|
||||||
fill_batch( null_batch, inputs, *ctx->io_map, mode );
|
fill_batch( null_batch, inputs, *ctx->io_map, mode );
|
||||||
|
|
||||||
// cfg decode
|
// cfg decode
|
||||||
|
@ -707,7 +689,7 @@ std::vector<token_t> generate( vall_e_context_t* ctx, vall_e_inputs_t& inputs, i
|
||||||
for ( auto idx = 0; idx < n_outputs; ++idx ) {
|
for ( auto idx = 0; idx < n_outputs; ++idx ) {
|
||||||
// skip if not masked
|
// skip if not masked
|
||||||
if ( !is_masked[idx] ) {
|
if ( !is_masked[idx] ) {
|
||||||
scores[idx] = 0.0;
|
scores[idx] = entropy_scoring ? 0.0 : 1.0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -716,6 +698,7 @@ std::vector<token_t> generate( vall_e_context_t* ctx, vall_e_inputs_t& inputs, i
|
||||||
|
|
||||||
// perform softmax before modifying logits
|
// perform softmax before modifying logits
|
||||||
std::vector<float> softmaxed = soft_max( n_vocab, logit );
|
std::vector<float> softmaxed = soft_max( n_vocab, logit );
|
||||||
|
int32_t t_u = std::distance( softmaxed.begin(), std::max_element(softmaxed.begin(), softmaxed.end()) );
|
||||||
|
|
||||||
std::vector<float> summed(n_vocab);
|
std::vector<float> summed(n_vocab);
|
||||||
for (int i = 0; i < n_vocab; i++) {
|
for (int i = 0; i < n_vocab; i++) {
|
||||||
|
@ -739,19 +722,16 @@ std::vector<token_t> generate( vall_e_context_t* ctx, vall_e_inputs_t& inputs, i
|
||||||
// store token if it was masked
|
// store token if it was masked
|
||||||
output_tokens[idx] = t;
|
output_tokens[idx] = t;
|
||||||
// update score if it was masked
|
// update score if it was masked
|
||||||
|
if ( entropy_scoring ) {
|
||||||
// this is actually wrong
|
float entropy = 0.f;
|
||||||
scores[idx] = 1.0 - softmaxed[t]; // invert so we pick the worst tokens later
|
for (int v = 0; v < n_vocab; ++v ) {
|
||||||
|
float p = softmaxed[v];
|
||||||
// this seems to work better
|
if (p > 0) entropy -= p * std::log(p + 1e-9);
|
||||||
/*
|
}
|
||||||
float entropy = 0.f;
|
scores[idx] = entropy / std::log(n_vocab); // normalize [0–1]
|
||||||
for (int v = 0; v < n_vocab; ++v ) {
|
} else {
|
||||||
float p = softmaxed[v];
|
scores[idx] = softmaxed[t_u]; // invert so we pick the worst tokens later
|
||||||
if (p > 0) entropy -= p * std::log(p + 1e-9);
|
|
||||||
}
|
}
|
||||||
scores[idx] = entropy / std::log(n_vocab); // normalize [0–1]
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_sampler_free(smpl);
|
llama_sampler_free(smpl);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user