actually don't default to compute split losses, test bitnet model doesn't seem to be doing things right (despite debug printouts showing theyre roughly the same logit/loss sequences, could just be bitnet linears being not up to par on actual models)

This commit is contained in:
mrq 2024-06-01 09:12:51 -05:00
parent e9c87060df
commit 31785f4eeb

View File

@ -213,7 +213,7 @@ class Model:
attention: str = "auto"
audio_embedding_sums: bool = True
dropout: float = 0.1 # adjustable dropout value
loss_factors: dict = field(default_factory=lambda: { "text": 0.1, "prom": 0.0, "resp": 1.0 })
loss_factors: dict = field(default_factory=lambda: {}) # "text": 0.1, "prom": 0.0, "resp": 1.0 })
kv_heads: int = 0
def get(self, name=None):