diff --git a/data/config.yaml b/data/config.yaml index cebc926..8f7343f 100644 --- a/data/config.yaml +++ b/data/config.yaml @@ -27,6 +27,7 @@ models: interleave: False # interleaves RVQ levels, only works with above for now audio_embedding_mode: "" # "" | "inclusive" | "exclusive", whether to utilize the audio backend's embeddings with the input embeddings audio_embedding_sums: False # whether the input embeddings include all prior RVQ levels (sums) or only the current one, further experimentation is needed to see if this matters + p_rvq_levels: "equal" # "equal" | "auto", sets probabilities of which RVQ level to select during training, auto will have the next RVQ level half as likely as the previous one hyperparameters: autotune: False diff --git a/vall_e/config.py b/vall_e/config.py index fc3b144..0f08ac1 100755 --- a/vall_e/config.py +++ b/vall_e/config.py @@ -262,11 +262,7 @@ class Model: if cfg.optimizations.bitnet: name.append("bitnet") - if self.interleave: - name.append("interleaved") - else: - name.append(f'{self.prom_levels}') - + name.append(f'{self.resp_levels}') return "-".join(name)