actually don't default to compute split losses, test bitnet model doesn't seem to be doing things right (despite debug printouts showing theyre roughly the same logit/loss sequences, could just be bitnet linears being not up to par on actual models)
This commit is contained in:
parent
e9c87060df
commit
31785f4eeb
|
@ -213,7 +213,7 @@ class Model:
|
||||||
attention: str = "auto"
|
attention: str = "auto"
|
||||||
audio_embedding_sums: bool = True
|
audio_embedding_sums: bool = True
|
||||||
dropout: float = 0.1 # adjustable dropout value
|
dropout: float = 0.1 # adjustable dropout value
|
||||||
loss_factors: dict = field(default_factory=lambda: { "text": 0.1, "prom": 0.0, "resp": 1.0 })
|
loss_factors: dict = field(default_factory=lambda: {}) # "text": 0.1, "prom": 0.0, "resp": 1.0 })
|
||||||
kv_heads: int = 0
|
kv_heads: int = 0
|
||||||
|
|
||||||
def get(self, name=None):
|
def get(self, name=None):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user