I should really just grab modelling_llama wholesale (fix for the adapted attention class)

2025-01-28 21:55:05 -06:00 · 2025-01-28 21:55:05 -06:00 · 0841f366e8
commit 0841f366e8
parent e5f9da2221
1 changed files with 5 additions and 0 deletions
--- a/vall_e/models/arch/llama.py
+++ b/vall_e/models/arch/llama.py
@ -35,6 +35,11 @@ class LlamaAttention_Adapted(LlamaAttention):

 		super().__init__(*args, **kwargs)

+		if not hasattr(self, "num_heads"):
+			self.num_heads = self.config.num_attention_heads
+		if not hasattr(self, "num_key_value_heads"):
+			self.num_key_value_heads = self.config.num_key_value_heads
+
 	# extracts inputs from a batch based on requested causality
 	def split_forward(
 		self,