speedups by terminating early and not having short observations take forever from ramblings
This commit is contained in:
parent
9e0fd8d79c
commit
a1cb43da5e
|
@ -7,10 +7,10 @@ from typing import Any, Dict, List, Optional, Tuple
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain import LLMChain
|
||||
from langchain.experimental.generative_agents.memory import GenerativeAgentMemory
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.schema import BaseLanguageModel
|
||||
|
||||
from .memory import GenerativeAgentMemory
|
||||
from .prompts import PROMPTS
|
||||
|
||||
class GenerativeAgent(BaseModel):
|
||||
|
|
|
@ -126,6 +126,7 @@ class GenerativeAgentMemory(BaseMemory):
|
|||
self.pause_to_reflect()
|
||||
# Hack to clear the importance from reflection
|
||||
self.aggregate_importance = 0.0
|
||||
|
||||
return result
|
||||
|
||||
def fetch_memories(self, observation: str) -> List[Document]:
|
||||
|
|
|
@ -28,7 +28,7 @@ if LLM_PROMPT_TUNE == "vicuna":
|
|||
"\n{relevant_memories}"
|
||||
"\nMost recent observations: {most_recent_memories}"
|
||||
"\nObservation: {observation}"
|
||||
"\n\n{suffix}"
|
||||
"\n{suffix}"
|
||||
"\nASSISTANT: "
|
||||
),
|
||||
"generate_reaction": (
|
||||
|
@ -36,18 +36,18 @@ if LLM_PROMPT_TUNE == "vicuna":
|
|||
" what would be an appropriate reaction? Respond in one line."
|
||||
' If the action is to engage in dialogue, write:\nSAY: "what to say"'
|
||||
"\notherwise, write:\nREACT: {agent_name}'s reaction (if anything)."
|
||||
"\nEither do nothing, react, or say something but not both.\n\n"
|
||||
"\nEither do nothing, react, or say something but not both."
|
||||
),
|
||||
"generate_dialogue_response": (
|
||||
"What would {agent_name} say? To end the conversation, write:"
|
||||
' GOODBYE: "what to say". Otherwise to continue the conversation,'
|
||||
' write: SAY: "what to say next"\n\n'
|
||||
' write: SAY: "what to say next"'
|
||||
),
|
||||
"compute_agent_summary": (
|
||||
"USER: How would you summarize {name}'s core characteristics given the"
|
||||
" following statements:\n"
|
||||
"{relevant_memories}"
|
||||
"Do not embellish.\n"
|
||||
"Do not embellish."
|
||||
"\nASSISTANT: Summary: "
|
||||
),
|
||||
"topic_of_reflection": (
|
||||
|
@ -59,7 +59,7 @@ if LLM_PROMPT_TUNE == "vicuna":
|
|||
),
|
||||
"insights_on_topic": (
|
||||
"USER: Statements about {topic}\n"
|
||||
"{related_statements}\n\n"
|
||||
"{related_statements}\n"
|
||||
"What 5 high-level insights can you infer from the above statements?"
|
||||
" (example format: insight (because of 1, 5, 3))"
|
||||
"\nASSISTANT: "
|
||||
|
@ -69,7 +69,7 @@ if LLM_PROMPT_TUNE == "vicuna":
|
|||
" (e.g., brushing teeth, making bed) and 10 is"
|
||||
" extremely poignant (e.g., a break up, college"
|
||||
" acceptance), rate the likely poignancy of the"
|
||||
" following piece of memory. Respond with a single integer."
|
||||
" following piece of memory. Respond with only a single integer, nothing else."
|
||||
"\nMemory: {memory_content}"
|
||||
"\nASSISTANT: Rating: "
|
||||
),
|
||||
|
|
|
@ -95,8 +95,8 @@ def save_agent_proxy( agents ):
|
|||
if not isinstance( agents, list ):
|
||||
agents = [ agents ]
|
||||
|
||||
for agent in agents:
|
||||
agent = AGENTS[agent]
|
||||
for name in agents:
|
||||
agent = AGENTS[name]
|
||||
save_agent( agent )
|
||||
|
||||
def load_agent_proxy( agents ):
|
||||
|
|
23
src/utils.py
23
src/utils.py
|
@ -30,7 +30,8 @@ else:
|
|||
LLM_TYPE = os.environ.get('LLM_TYPE', "llamacpp") # options: llamacpp, oai
|
||||
LLM_LOCAL_MODEL = os.environ.get('LLM_MODEL', "./models/ggml-vicuna-13b-1.1/ggml-vic13b-uncensored-q4_2.bin") # "./models/llama-13b-supercot-ggml/ggml-model-q4_0.bin"
|
||||
LLM_CONTEXT = int(os.environ.get('LLM_CONTEXT', '2048'))
|
||||
EMBEDDING_TYPE = os.environ.get("LLM_EMBEDDING_TYPE", "llamacpp") # options: llamacpp, oai, hf
|
||||
LLM_THREADS = int(os.environ.get('LLM_THREADS', '6'))
|
||||
EMBEDDING_TYPE = os.environ.get("LLM_EMBEDDING_TYPE", "hf") # options: llamacpp, oai, hf
|
||||
|
||||
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) # unncessesary but whatever
|
||||
if LLM_TYPE=="llamacpp":
|
||||
|
@ -39,8 +40,10 @@ if LLM_TYPE=="llamacpp":
|
|||
LLM = LlamaCpp(
|
||||
model_path=LLM_LOCAL_MODEL,
|
||||
callback_manager=callback_manager,
|
||||
verbose=False,
|
||||
n_ctx=LLM_CONTEXT
|
||||
verbose=True,
|
||||
n_ctx=LLM_CONTEXT,
|
||||
n_threads=LLM_THREADS,
|
||||
stop=["\n\n"]
|
||||
)
|
||||
elif LLM_TYPE=="oai":
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
|
@ -86,7 +89,11 @@ elif EMBEDDING_TYPE == "oai":
|
|||
elif EMBEDDING_TYPE == "llamacpp":
|
||||
from langchain.embeddings import LlamaCppEmbeddings
|
||||
|
||||
EMBEDDINGS_MODEL = LlamaCppEmbeddings(model_path=LLM_LOCAL_MODEL)
|
||||
EMBEDDINGS_MODEL = LlamaCppEmbeddings(
|
||||
model_path=LLM_LOCAL_MODEL,
|
||||
n_ctx=LLM_CONTEXT,
|
||||
n_threads=LLM_THREADS,
|
||||
)
|
||||
EMBEDDINGS_SIZE = 5120
|
||||
else:
|
||||
raise f"Invalid embedding type: {EMBEDDING_TYPE}"
|
||||
|
@ -110,12 +117,12 @@ def _create_new_memory_retriever():
|
|||
vectorstore = FAISS(EMBEDDINGS_MODEL.embed_query, index, InMemoryDocstore({}), {}, relevance_score_fn=_relevance_score_fn)
|
||||
return TimeWeightedVectorStoreRetriever(vectorstore=vectorstore, other_score_keys=["importance"], k=15)
|
||||
|
||||
def _create_new_memories(reflection_threshold=8):
|
||||
def _create_new_memories():
|
||||
return GenerativeAgentMemory(llm=LLM,
|
||||
memory_retriever=_create_new_memory_retriever(),
|
||||
reflection_threshold=reflection_threshold,
|
||||
verbose=False,
|
||||
max_tokens_limit=LLM_CONTEXT
|
||||
reflection_threshold=8,
|
||||
verbose=True,
|
||||
max_tokens_limit=LLM_CONTEXT/2
|
||||
)
|
||||
|
||||
def create_agent(**kwargs):
|
||||
|
|
Loading…
Reference in New Issue
Block a user