From 4a909ceff8767e18c5d2d1192cf19a6266eb5af6 Mon Sep 17 00:00:00 2001 From: mrq Date: Sat, 5 Apr 2025 11:04:26 -0500 Subject: [PATCH] temp fix for vall_e.cpp demask scoring regression --- vall_e.cpp/README.md | 3 +-- vall_e.cpp/vall_e.cpp | 12 +++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/vall_e.cpp/README.md b/vall_e.cpp/README.md index 197d9e3..8344737 100644 --- a/vall_e.cpp/README.md +++ b/vall_e.cpp/README.md @@ -23,8 +23,6 @@ Run `make`. ## To-Do -* [ ] fix regressions that appeared for whatever reason - * it seems to be related to the demasking step, as low steps = fine, more steps = bad...... * [x] converted model to GGUF * [x] convert it without modifying any of the existing code, as the tokenizer requires some care * [x] basic framework @@ -42,6 +40,7 @@ Run `make`. * [x] `AR` sampling * [x] working `NAR-len` output * [x] `NAR-len` sampling + * [ ] proper scoring * [x] working `NAR` output * [x] `NAR` sampling * [x] decode audio to disk diff --git a/vall_e.cpp/vall_e.cpp b/vall_e.cpp/vall_e.cpp index 5c69bfb..a824544 100644 --- a/vall_e.cpp/vall_e.cpp +++ b/vall_e.cpp/vall_e.cpp @@ -748,7 +748,17 @@ std::vector generate( vall_e_context_t* ctx, vall_e_inputs_t& inputs, i // store token if it was masked output_tokens[idx] = t; // update score if it was masked - scores[idx] = 1.0f - softmaxed[t]; // invert so we pick the worst tokens later + + // this is actually wrong + // scores[idx] = 1.0f - softmaxed[t]; // invert so we pick the worst tokens later + + // this seems to work better + float entropy = 0.f; + for (int v = 0; v < n_vocab; ++v ) { + float p = softmaxed[v]; + if (p > 0) entropy -= p * std::log(p + 1e-9); + } + scores[idx] = entropy / std::log(n_vocab); // normalize [0–1] } llama_sampler_free(smpl);