From 40b089daf3a1c50dfa48c97b6e17b2741fa290ad Mon Sep 17 00:00:00 2001
From: mrq <mrq@ecker.tech>
Date: Sat, 12 Oct 2024 09:57:34 -0500
Subject: [PATCH] lol

---
 vall_e/plot.py     |  5 +----
 vall_e/samplers.py | 28 +++++++++++++++++-----------
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/vall_e/plot.py b/vall_e/plot.py
index ee7ca01..c2231f3 100644
--- a/vall_e/plot.py
+++ b/vall_e/plot.py
@@ -99,10 +99,7 @@ def plot_entropies( entropies ):
 	fig.set_figwidth( 16 * len(entropies) // cfg.dataset.frames_per_second )
 	"""
 
-	data = {}
-	
-	for key in entropies[0][0].keys():
-		data[key] = [ e[0][key].item() if hasattr( e[0][key], "item" ) else e[0][key] for e in entropies ]
+	data = { key: [ e[0][key] for e in entropies ] for key in entropies[0][0].keys() }
 
 	df = pd.DataFrame(data)
 	df.plot()
diff --git a/vall_e/samplers.py b/vall_e/samplers.py
index 0a2563c..76f0cf4 100644
--- a/vall_e/samplers.py
+++ b/vall_e/samplers.py
@@ -252,19 +252,19 @@ def calculate_entropix_metrics( logits, attention_scores=None, dim=-1 ):
 
 	interaction_strength = torch.mean(torch.abs(attention_scores), dim=(1, 2, 3))
 	return {
-		"logits_entropy": torch.mean(entropy),
-		"logits_varentropy": torch.mean(varentropy),
-		"attn_entropy": torch.mean(attn_entropy),
-		"attn_varentropy": torch.mean(attn_varentropy),
-		"agreement": torch.mean(agreement),
-		"interaction_strength": torch.mean(torch.abs(attention_scores), dim=(1, 2, 3)),
+		"logits_entropy": torch.mean(entropy).item(),
+		"logits_varentropy": torch.mean(varentropy).item(),
+		"attn_entropy": torch.mean(attn_entropy).item(),
+		"attn_varentropy": torch.mean(attn_varentropy).item(),
+		"agreement": torch.mean(agreement).item(),
+		"interaction_strength": torch.mean(torch.abs(attention_scores), dim=(1, 2, 3)).item(),
 		"action": -1
 	}
 
 # to-do: play around with these values
 @dataclass()
 class EntropixSamplerConfig:
-	temp: float = 0.85
+	temp: float = 0.666
 	top_p: float = 0.90
 	top_k: int = 27
 	min_p: float = 0.01 # was 0.03  # Turn this down to 0.01 to reduce the shoggoth
@@ -315,6 +315,8 @@ class EntropixSamplerConfig:
 	min_p_max: int = 0.5
 
 Exponential = torch.distributions.exponential.Exponential(1.0)
+
+# Doing as close to the original sampling method just to reduce variance
 def _sample_entropix(
 	logits,
 	temperature=1.0,
@@ -365,6 +367,10 @@ def sample_entropix(
 	min_p=0.0,
 	cfg=EntropixSamplerConfig(),
 ):
+	temperature = cfg.temp
+	top_k = cfg.top_k
+	top_p = cfg.top_p
+
 	metrics = calculate_entropix_metrics( logits, attentions )
 
 	ent, vent = metrics["logits_entropy"], metrics["logits_varentropy"]
@@ -403,10 +409,10 @@ def sample_entropix(
 		logits_uncertainty = ent + vent
 		attn_uncertainty = attn_ent + attn_vent
 
-		temperature *= float(1 + cfg.ada_temp_logits * logits_uncertainty + cfg.ada_temp_attn * attn_uncertainty - cfg.ada_temp_agree * agreement)
-		top_p = float(top_p * (1 + cfg.ada_top_p * attn_vent))
-		top_k = int(round(float(top_k * (1 + cfg.ada_top_k_int * interaction_strength - cfg.ada_top_k_agree * agreement))))
-		min_p = float(cfg.min_p * (1 - cfg.ada_min_p * logits_uncertainty))
+		temperature *= 1 + cfg.ada_temp_logits * logits_uncertainty + cfg.ada_temp_attn * attn_uncertainty - cfg.ada_temp_agree * agreement
+		top_p = top_p * (1 + cfg.ada_top_p * attn_vent)
+		top_k = round(float(top_k * (1 + cfg.ada_top_k_int * interaction_strength - cfg.ada_top_k_agree * agreement)))
+		min_p = cfg.min_p * (1 - cfg.ada_min_p * logits_uncertainty)
 
 		samples = [ _sample_entropix( logits.clone(), temperature, top_k, top_p, min_p, cfg=cfg ) for _ in range(cfg.n_adaptive_samples) ]