speedups by terminating early and not having short observations take forever from ramblings

2023-04-29 22:04:18 +00:00 · 2023-04-29 22:04:18 +00:00 · a1cb43da5e
commit a1cb43da5e
parent 9e0fd8d79c
5 changed files with 25 additions and 17 deletions
--- a/src/ext/generative_agent.py
+++ b/src/ext/generative_agent.py
@ -7,10 +7,10 @@ from typing import Any, Dict, List, Optional, Tuple
 from pydantic import BaseModel, Field
 from langchain import LLMChain
 from langchain.experimental.generative_agents.memory import GenerativeAgentMemory
 from langchain.prompts import PromptTemplate
 from langchain.schema import BaseLanguageModel
 from .memory import GenerativeAgentMemory
 from .prompts import PROMPTS
 class GenerativeAgent(BaseModel):
--- a/src/ext/memory.py
+++ b/src/ext/memory.py
@ -126,6 +126,7 @@ class GenerativeAgentMemory(BaseMemory):
            self.pause_to_reflect()
            # Hack to clear the importance from reflection
            self.aggregate_importance = 0.0
        return result
    def fetch_memories(self, observation: str) -> List[Document]:
--- a/src/ext/prompts.py
+++ b/src/ext/prompts.py
@ -28,7 +28,7 @@ if LLM_PROMPT_TUNE == "vicuna":
            "\n{relevant_memories}"
            "\nMost recent observations: {most_recent_memories}"
            "\nObservation: {observation}"
-            "\n\n{suffix}"
+            "\n{suffix}"
            "\nASSISTANT: "
        ),
        "generate_reaction": (
@ -36,18 +36,18 @@ if LLM_PROMPT_TUNE == "vicuna":
            " what would be an appropriate reaction? Respond in one line."
            ' If the action is to engage in dialogue, write:\nSAY: "what to say"'
            "\notherwise, write:\nREACT: {agent_name}'s reaction (if anything)."
-            "\nEither do nothing, react, or say something but not both.\n\n"
+            "\nEither do nothing, react, or say something but not both."
        ),
        "generate_dialogue_response": (
            "What would {agent_name} say? To end the conversation, write:"
            ' GOODBYE: "what to say". Otherwise to continue the conversation,'
-            ' write: SAY: "what to say next"\n\n'
+            ' write: SAY: "what to say next"'
        ),
        "compute_agent_summary": (
            "USER: How would you summarize {name}'s core characteristics given the"
            " following statements:\n"
            "{relevant_memories}"
-            "Do not embellish.\n"
+            "Do not embellish."
            "\nASSISTANT: Summary: "
        ),
        "topic_of_reflection":  (
@ -59,7 +59,7 @@ if LLM_PROMPT_TUNE == "vicuna":
        ),
        "insights_on_topic": (
            "USER: Statements about {topic}\n"
-            "{related_statements}\n\n"
+            "{related_statements}\n"
            "What 5 high-level insights can you infer from the above statements?"
            " (example format: insight (because of 1, 5, 3))"
            "\nASSISTANT: "
@ -69,7 +69,7 @@ if LLM_PROMPT_TUNE == "vicuna":
            " (e.g., brushing teeth, making bed) and 10 is"
            " extremely poignant (e.g., a break up, college"
            " acceptance), rate the likely poignancy of the"
-            " following piece of memory. Respond with a single integer."
+            " following piece of memory. Respond with only a single integer, nothing else."
            "\nMemory: {memory_content}"
            "\nASSISTANT: Rating: "
        ),
--- a/src/main.py
+++ b/src/main.py
@ -95,8 +95,8 @@ def save_agent_proxy( agents ):
 	if not isinstance( agents, list ):
 		agents = [ agents ]
-	for agent in agents:
+	for name in agents:
-		agent = AGENTS[agent]
+		agent = AGENTS[name]
 		save_agent( agent )
 def load_agent_proxy( agents ):
--- a/src/utils.py
+++ b/src/utils.py
@ -30,7 +30,8 @@ else:
 LLM_TYPE = os.environ.get('LLM_TYPE', "llamacpp") # options: llamacpp, oai
 LLM_LOCAL_MODEL = os.environ.get('LLM_MODEL', "./models/ggml-vicuna-13b-1.1/ggml-vic13b-uncensored-q4_2.bin") # "./models/llama-13b-supercot-ggml/ggml-model-q4_0.bin"
 LLM_CONTEXT = int(os.environ.get('LLM_CONTEXT', '2048'))
-EMBEDDING_TYPE = os.environ.get("LLM_EMBEDDING_TYPE", "llamacpp") # options: llamacpp, oai, hf
+LLM_THREADS = int(os.environ.get('LLM_THREADS', '6'))
 EMBEDDING_TYPE = os.environ.get("LLM_EMBEDDING_TYPE", "hf") # options: llamacpp, oai, hf
 callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) # unncessesary but whatever
 if LLM_TYPE=="llamacpp":
@ -39,8 +40,10 @@ if LLM_TYPE=="llamacpp":
 	LLM = LlamaCpp(
 		model_path=LLM_LOCAL_MODEL,
 		callback_manager=callback_manager,
-		verbose=False,
+		verbose=True,
-		n_ctx=LLM_CONTEXT
+		n_ctx=LLM_CONTEXT,
 		n_threads=LLM_THREADS,
 		stop=["\n\n"]
 	)
 elif LLM_TYPE=="oai":
 	from langchain.chat_models import ChatOpenAI
@ -86,7 +89,11 @@ elif EMBEDDING_TYPE == "oai":
 elif EMBEDDING_TYPE == "llamacpp":
 	from langchain.embeddings import LlamaCppEmbeddings
-	EMBEDDINGS_MODEL = LlamaCppEmbeddings(model_path=LLM_LOCAL_MODEL)
+	EMBEDDINGS_MODEL = LlamaCppEmbeddings(
 		model_path=LLM_LOCAL_MODEL,
 		n_ctx=LLM_CONTEXT,
 		n_threads=LLM_THREADS,
 	)
 	EMBEDDINGS_SIZE = 5120
 else:
 	raise f"Invalid embedding type: {EMBEDDING_TYPE}"
@ -110,12 +117,12 @@ def _create_new_memory_retriever():
 	vectorstore = FAISS(EMBEDDINGS_MODEL.embed_query, index, InMemoryDocstore({}), {}, relevance_score_fn=_relevance_score_fn)
 	return TimeWeightedVectorStoreRetriever(vectorstore=vectorstore, other_score_keys=["importance"], k=15)	
-def _create_new_memories(reflection_threshold=8):
+def _create_new_memories():
 	return GenerativeAgentMemory(llm=LLM,
 		memory_retriever=_create_new_memory_retriever(),
-		reflection_threshold=reflection_threshold, 
+		reflection_threshold=8, 
-		verbose=False,
+		verbose=True,
-		max_tokens_limit=LLM_CONTEXT
+		max_tokens_limit=LLM_CONTEXT/2
 	)
 def create_agent(**kwargs):