speedups by terminating early and not having short observations take forever from ramblings

This commit is contained in:
mrq 2023-04-29 22:04:18 +00:00
parent 9e0fd8d79c
commit a1cb43da5e
5 changed files with 25 additions and 17 deletions

View File

@ -7,10 +7,10 @@ from typing import Any, Dict, List, Optional, Tuple
from pydantic import BaseModel, Field
from langchain import LLMChain
from langchain.experimental.generative_agents.memory import GenerativeAgentMemory
from langchain.prompts import PromptTemplate
from langchain.schema import BaseLanguageModel
from .memory import GenerativeAgentMemory
from .prompts import PROMPTS
class GenerativeAgent(BaseModel):

View File

@ -126,6 +126,7 @@ class GenerativeAgentMemory(BaseMemory):
self.pause_to_reflect()
# Hack to clear the importance from reflection
self.aggregate_importance = 0.0
return result
def fetch_memories(self, observation: str) -> List[Document]:

View File

@ -28,7 +28,7 @@ if LLM_PROMPT_TUNE == "vicuna":
"\n{relevant_memories}"
"\nMost recent observations: {most_recent_memories}"
"\nObservation: {observation}"
"\n\n{suffix}"
"\n{suffix}"
"\nASSISTANT: "
),
"generate_reaction": (
@ -36,18 +36,18 @@ if LLM_PROMPT_TUNE == "vicuna":
" what would be an appropriate reaction? Respond in one line."
' If the action is to engage in dialogue, write:\nSAY: "what to say"'
"\notherwise, write:\nREACT: {agent_name}'s reaction (if anything)."
"\nEither do nothing, react, or say something but not both.\n\n"
"\nEither do nothing, react, or say something but not both."
),
"generate_dialogue_response": (
"What would {agent_name} say? To end the conversation, write:"
' GOODBYE: "what to say". Otherwise to continue the conversation,'
' write: SAY: "what to say next"\n\n'
' write: SAY: "what to say next"'
),
"compute_agent_summary": (
"USER: How would you summarize {name}'s core characteristics given the"
" following statements:\n"
"{relevant_memories}"
"Do not embellish.\n"
"Do not embellish."
"\nASSISTANT: Summary: "
),
"topic_of_reflection": (
@ -59,7 +59,7 @@ if LLM_PROMPT_TUNE == "vicuna":
),
"insights_on_topic": (
"USER: Statements about {topic}\n"
"{related_statements}\n\n"
"{related_statements}\n"
"What 5 high-level insights can you infer from the above statements?"
" (example format: insight (because of 1, 5, 3))"
"\nASSISTANT: "
@ -69,7 +69,7 @@ if LLM_PROMPT_TUNE == "vicuna":
" (e.g., brushing teeth, making bed) and 10 is"
" extremely poignant (e.g., a break up, college"
" acceptance), rate the likely poignancy of the"
" following piece of memory. Respond with a single integer."
" following piece of memory. Respond with only a single integer, nothing else."
"\nMemory: {memory_content}"
"\nASSISTANT: Rating: "
),

View File

@ -95,8 +95,8 @@ def save_agent_proxy( agents ):
if not isinstance( agents, list ):
agents = [ agents ]
for agent in agents:
agent = AGENTS[agent]
for name in agents:
agent = AGENTS[name]
save_agent( agent )
def load_agent_proxy( agents ):

View File

@ -30,7 +30,8 @@ else:
LLM_TYPE = os.environ.get('LLM_TYPE', "llamacpp") # options: llamacpp, oai
LLM_LOCAL_MODEL = os.environ.get('LLM_MODEL', "./models/ggml-vicuna-13b-1.1/ggml-vic13b-uncensored-q4_2.bin") # "./models/llama-13b-supercot-ggml/ggml-model-q4_0.bin"
LLM_CONTEXT = int(os.environ.get('LLM_CONTEXT', '2048'))
EMBEDDING_TYPE = os.environ.get("LLM_EMBEDDING_TYPE", "llamacpp") # options: llamacpp, oai, hf
LLM_THREADS = int(os.environ.get('LLM_THREADS', '6'))
EMBEDDING_TYPE = os.environ.get("LLM_EMBEDDING_TYPE", "hf") # options: llamacpp, oai, hf
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) # unncessesary but whatever
if LLM_TYPE=="llamacpp":
@ -39,8 +40,10 @@ if LLM_TYPE=="llamacpp":
LLM = LlamaCpp(
model_path=LLM_LOCAL_MODEL,
callback_manager=callback_manager,
verbose=False,
n_ctx=LLM_CONTEXT
verbose=True,
n_ctx=LLM_CONTEXT,
n_threads=LLM_THREADS,
stop=["\n\n"]
)
elif LLM_TYPE=="oai":
from langchain.chat_models import ChatOpenAI
@ -86,7 +89,11 @@ elif EMBEDDING_TYPE == "oai":
elif EMBEDDING_TYPE == "llamacpp":
from langchain.embeddings import LlamaCppEmbeddings
EMBEDDINGS_MODEL = LlamaCppEmbeddings(model_path=LLM_LOCAL_MODEL)
EMBEDDINGS_MODEL = LlamaCppEmbeddings(
model_path=LLM_LOCAL_MODEL,
n_ctx=LLM_CONTEXT,
n_threads=LLM_THREADS,
)
EMBEDDINGS_SIZE = 5120
else:
raise f"Invalid embedding type: {EMBEDDING_TYPE}"
@ -110,12 +117,12 @@ def _create_new_memory_retriever():
vectorstore = FAISS(EMBEDDINGS_MODEL.embed_query, index, InMemoryDocstore({}), {}, relevance_score_fn=_relevance_score_fn)
return TimeWeightedVectorStoreRetriever(vectorstore=vectorstore, other_score_keys=["importance"], k=15)
def _create_new_memories(reflection_threshold=8):
def _create_new_memories():
return GenerativeAgentMemory(llm=LLM,
memory_retriever=_create_new_memory_retriever(),
reflection_threshold=reflection_threshold,
verbose=False,
max_tokens_limit=LLM_CONTEXT
reflection_threshold=8,
verbose=True,
max_tokens_limit=LLM_CONTEXT/2
)
def create_agent(**kwargs):