speedups by terminating early and not having short observations take forever from ramblings

This commit is contained in:
mrq 2023-04-29 22:04:18 +00:00
parent 9e0fd8d79c
commit a1cb43da5e
5 changed files with 25 additions and 17 deletions

View File

@ -7,10 +7,10 @@ from typing import Any, Dict, List, Optional, Tuple
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from langchain import LLMChain from langchain import LLMChain
from langchain.experimental.generative_agents.memory import GenerativeAgentMemory
from langchain.prompts import PromptTemplate from langchain.prompts import PromptTemplate
from langchain.schema import BaseLanguageModel from langchain.schema import BaseLanguageModel
from .memory import GenerativeAgentMemory
from .prompts import PROMPTS from .prompts import PROMPTS
class GenerativeAgent(BaseModel): class GenerativeAgent(BaseModel):

View File

@ -126,6 +126,7 @@ class GenerativeAgentMemory(BaseMemory):
self.pause_to_reflect() self.pause_to_reflect()
# Hack to clear the importance from reflection # Hack to clear the importance from reflection
self.aggregate_importance = 0.0 self.aggregate_importance = 0.0
return result return result
def fetch_memories(self, observation: str) -> List[Document]: def fetch_memories(self, observation: str) -> List[Document]:

View File

@ -28,7 +28,7 @@ if LLM_PROMPT_TUNE == "vicuna":
"\n{relevant_memories}" "\n{relevant_memories}"
"\nMost recent observations: {most_recent_memories}" "\nMost recent observations: {most_recent_memories}"
"\nObservation: {observation}" "\nObservation: {observation}"
"\n\n{suffix}" "\n{suffix}"
"\nASSISTANT: " "\nASSISTANT: "
), ),
"generate_reaction": ( "generate_reaction": (
@ -36,18 +36,18 @@ if LLM_PROMPT_TUNE == "vicuna":
" what would be an appropriate reaction? Respond in one line." " what would be an appropriate reaction? Respond in one line."
' If the action is to engage in dialogue, write:\nSAY: "what to say"' ' If the action is to engage in dialogue, write:\nSAY: "what to say"'
"\notherwise, write:\nREACT: {agent_name}'s reaction (if anything)." "\notherwise, write:\nREACT: {agent_name}'s reaction (if anything)."
"\nEither do nothing, react, or say something but not both.\n\n" "\nEither do nothing, react, or say something but not both."
), ),
"generate_dialogue_response": ( "generate_dialogue_response": (
"What would {agent_name} say? To end the conversation, write:" "What would {agent_name} say? To end the conversation, write:"
' GOODBYE: "what to say". Otherwise to continue the conversation,' ' GOODBYE: "what to say". Otherwise to continue the conversation,'
' write: SAY: "what to say next"\n\n' ' write: SAY: "what to say next"'
), ),
"compute_agent_summary": ( "compute_agent_summary": (
"USER: How would you summarize {name}'s core characteristics given the" "USER: How would you summarize {name}'s core characteristics given the"
" following statements:\n" " following statements:\n"
"{relevant_memories}" "{relevant_memories}"
"Do not embellish.\n" "Do not embellish."
"\nASSISTANT: Summary: " "\nASSISTANT: Summary: "
), ),
"topic_of_reflection": ( "topic_of_reflection": (
@ -59,7 +59,7 @@ if LLM_PROMPT_TUNE == "vicuna":
), ),
"insights_on_topic": ( "insights_on_topic": (
"USER: Statements about {topic}\n" "USER: Statements about {topic}\n"
"{related_statements}\n\n" "{related_statements}\n"
"What 5 high-level insights can you infer from the above statements?" "What 5 high-level insights can you infer from the above statements?"
" (example format: insight (because of 1, 5, 3))" " (example format: insight (because of 1, 5, 3))"
"\nASSISTANT: " "\nASSISTANT: "
@ -69,7 +69,7 @@ if LLM_PROMPT_TUNE == "vicuna":
" (e.g., brushing teeth, making bed) and 10 is" " (e.g., brushing teeth, making bed) and 10 is"
" extremely poignant (e.g., a break up, college" " extremely poignant (e.g., a break up, college"
" acceptance), rate the likely poignancy of the" " acceptance), rate the likely poignancy of the"
" following piece of memory. Respond with a single integer." " following piece of memory. Respond with only a single integer, nothing else."
"\nMemory: {memory_content}" "\nMemory: {memory_content}"
"\nASSISTANT: Rating: " "\nASSISTANT: Rating: "
), ),

View File

@ -95,8 +95,8 @@ def save_agent_proxy( agents ):
if not isinstance( agents, list ): if not isinstance( agents, list ):
agents = [ agents ] agents = [ agents ]
for agent in agents: for name in agents:
agent = AGENTS[agent] agent = AGENTS[name]
save_agent( agent ) save_agent( agent )
def load_agent_proxy( agents ): def load_agent_proxy( agents ):

View File

@ -30,7 +30,8 @@ else:
LLM_TYPE = os.environ.get('LLM_TYPE', "llamacpp") # options: llamacpp, oai LLM_TYPE = os.environ.get('LLM_TYPE', "llamacpp") # options: llamacpp, oai
LLM_LOCAL_MODEL = os.environ.get('LLM_MODEL', "./models/ggml-vicuna-13b-1.1/ggml-vic13b-uncensored-q4_2.bin") # "./models/llama-13b-supercot-ggml/ggml-model-q4_0.bin" LLM_LOCAL_MODEL = os.environ.get('LLM_MODEL', "./models/ggml-vicuna-13b-1.1/ggml-vic13b-uncensored-q4_2.bin") # "./models/llama-13b-supercot-ggml/ggml-model-q4_0.bin"
LLM_CONTEXT = int(os.environ.get('LLM_CONTEXT', '2048')) LLM_CONTEXT = int(os.environ.get('LLM_CONTEXT', '2048'))
EMBEDDING_TYPE = os.environ.get("LLM_EMBEDDING_TYPE", "llamacpp") # options: llamacpp, oai, hf LLM_THREADS = int(os.environ.get('LLM_THREADS', '6'))
EMBEDDING_TYPE = os.environ.get("LLM_EMBEDDING_TYPE", "hf") # options: llamacpp, oai, hf
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) # unncessesary but whatever callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) # unncessesary but whatever
if LLM_TYPE=="llamacpp": if LLM_TYPE=="llamacpp":
@ -39,8 +40,10 @@ if LLM_TYPE=="llamacpp":
LLM = LlamaCpp( LLM = LlamaCpp(
model_path=LLM_LOCAL_MODEL, model_path=LLM_LOCAL_MODEL,
callback_manager=callback_manager, callback_manager=callback_manager,
verbose=False, verbose=True,
n_ctx=LLM_CONTEXT n_ctx=LLM_CONTEXT,
n_threads=LLM_THREADS,
stop=["\n\n"]
) )
elif LLM_TYPE=="oai": elif LLM_TYPE=="oai":
from langchain.chat_models import ChatOpenAI from langchain.chat_models import ChatOpenAI
@ -86,7 +89,11 @@ elif EMBEDDING_TYPE == "oai":
elif EMBEDDING_TYPE == "llamacpp": elif EMBEDDING_TYPE == "llamacpp":
from langchain.embeddings import LlamaCppEmbeddings from langchain.embeddings import LlamaCppEmbeddings
EMBEDDINGS_MODEL = LlamaCppEmbeddings(model_path=LLM_LOCAL_MODEL) EMBEDDINGS_MODEL = LlamaCppEmbeddings(
model_path=LLM_LOCAL_MODEL,
n_ctx=LLM_CONTEXT,
n_threads=LLM_THREADS,
)
EMBEDDINGS_SIZE = 5120 EMBEDDINGS_SIZE = 5120
else: else:
raise f"Invalid embedding type: {EMBEDDING_TYPE}" raise f"Invalid embedding type: {EMBEDDING_TYPE}"
@ -110,12 +117,12 @@ def _create_new_memory_retriever():
vectorstore = FAISS(EMBEDDINGS_MODEL.embed_query, index, InMemoryDocstore({}), {}, relevance_score_fn=_relevance_score_fn) vectorstore = FAISS(EMBEDDINGS_MODEL.embed_query, index, InMemoryDocstore({}), {}, relevance_score_fn=_relevance_score_fn)
return TimeWeightedVectorStoreRetriever(vectorstore=vectorstore, other_score_keys=["importance"], k=15) return TimeWeightedVectorStoreRetriever(vectorstore=vectorstore, other_score_keys=["importance"], k=15)
def _create_new_memories(reflection_threshold=8): def _create_new_memories():
return GenerativeAgentMemory(llm=LLM, return GenerativeAgentMemory(llm=LLM,
memory_retriever=_create_new_memory_retriever(), memory_retriever=_create_new_memory_retriever(),
reflection_threshold=reflection_threshold, reflection_threshold=8,
verbose=False, verbose=True,
max_tokens_limit=LLM_CONTEXT max_tokens_limit=LLM_CONTEXT/2
) )
def create_agent(**kwargs): def create_agent(**kwargs):