From e152cd98a4571d7e0fa5693133855513969789ef Mon Sep 17 00:00:00 2001 From: mrq Date: Wed, 3 May 2023 00:26:37 +0000 Subject: [PATCH] updated requirements because I had installed this in WSL2 --- README.md | 2 + requirements.txt | 6 ++- src/ext/generative_agent.py | 48 ++++++++++++++++-------- src/ext/memory.py | 26 ++++--------- src/ext/prompts.py | 75 ++++++++++++++++++++----------------- src/main.py | 37 +++++++++++++++++- src/utils.py | 24 ++++++------ 7 files changed, 134 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index 6797f52..60b03eb 100755 --- a/README.md +++ b/README.md @@ -50,4 +50,6 @@ Even using one that's more instruction-tuned like Vicuna (with a `SYSTEM:\nUSER: However, I seem to be getting consistent results with SuperCOT 33B, it's just, well, slow. SuperCOT 13B seems to be giving better answers over Vicuna-1.1 13B, so. +A ***lot*** of prompt wrangling is needed, and a lot of the routines could be polished up (for example, an observation queries the LM for a rating, and each response reaction requires quering for the observed entity, then the relationship between an agent and observed entity which ends up just summarizing relevant context/memories, and then queries for a response), and if one of these steps fails, then the fail rate is higher. If anything, I might as well just work from the ground up and only really salvage the use of FAISS to store embedded-vectors. + GPT4 seems to Just Work, unfortunately. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e5e105c..d2075b9 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ langchain openai -llamacpp -gradio \ No newline at end of file +llama-cpp-python +gradio +faiss-cpu +termcolor \ No newline at end of file diff --git a/src/ext/generative_agent.py b/src/ext/generative_agent.py index 7c0c724..a1c86a0 100755 --- a/src/ext/generative_agent.py +++ b/src/ext/generative_agent.py @@ -34,7 +34,7 @@ from langchain.prompts import PromptTemplate from langchain.schema import BaseLanguageModel from .memory import GenerativeAgentMemory -from .prompts import get_prompt +from .prompts import get_prompt, get_stop_tokens class GenerativeAgent(BaseModel): """A character with memory and innate characteristics.""" @@ -87,14 +87,14 @@ class GenerativeAgent(BaseModel): def _get_entity_from_observation(self, observation: str) -> str: prompt = PromptTemplate.from_template(get_prompt('entity_from_observation')) - response = self.chain(prompt).run(observation=observation).strip().replace("Entity=", "").replace("Entity: ", "") # OAI will keep this + response = self.chain(prompt).run(stop=get_stop_tokens([".", "(", "'"]), observation=observation).strip() if self.verbose: print(response) return response def _get_entity_action(self, observation: str, entity_name: str) -> str: prompt = PromptTemplate.from_template(get_prompt('entity_action')) - response = self.chain(prompt).run(entity=entity_name, observation=observation).strip() + response = self.chain(prompt).run(stop=get_stop_tokens(), entity=entity_name, observation=observation).strip() if self.verbose: print(response) return response @@ -113,21 +113,23 @@ class GenerativeAgent(BaseModel): entity_action = self._get_entity_action(observation, entity_name) q2 = f"{entity_name} is {entity_action}" - summary = self.chain(prompt=prompt).run(q1=q1, queries=[q1, q2]).strip() - return summary + summary = self.chain(prompt=prompt).run(name=self.name, stop=get_stop_tokens(), q1=q1, queries=[q1, q2]).strip() + return f'{self.name} {summary}' - #return self.chain(prompt=prompt).run(q1=q1, q2=q2).strip() + #return self.chain(prompt=prompt).run(stop=get_stop_tokens(), q1=q1, q2=q2).strip() def _generate_reaction(self, observation: str, suffix: str) -> str: """React to a given observation or dialogue act.""" prompt = PromptTemplate.from_template( get_prompt('generate_reaction').replace("{suffix}", suffix) ) - summary = self.get_summary() - relevant_memories = self.summarize_related_memories(observation) + summary = self.get_summary().replace(u"\u200B", "").strip() + relevant_memories = self.summarize_related_memories(observation).replace(u"\u200B", "").strip() + recent_memories = "\n".join(self.get_most_recent_memories()) # I think relevant_memories is suppose to only provide context for a relationship between agent and observer, as suggested with the query # but the original implementation seems to just leverage it to further filter relevant memories, per the name + if relevant_memories and relevant_memories != "N/A": memory = relevant_memories else: @@ -140,9 +142,11 @@ class GenerativeAgent(BaseModel): status=self.status if self.status else "N/A", summary=summary if summary else "N/A", memory=memory if memory else "N/A", + #relevant_memories=relevant_memories if relevant_memories else "N/A", + #recent_memories=recent_memories if recent_memories else "N/A", observation=observation if observation else "N/A", ) - reaction = self.chain(prompt=prompt).run(**kwargs).strip() + reaction = self.chain(prompt=prompt).run(stop=get_stop_tokens(), **kwargs).strip() if self.verbose: print(reaction) return reaction @@ -150,6 +154,20 @@ class GenerativeAgent(BaseModel): def _clean_response(self, text: str) -> str: return re.sub(f"^{self.name} ", "", text.strip()).strip() + def generate_response(self, observation: str) -> Tuple[bool, str]: + """React to a given observation.""" + call_to_action_template = get_prompt('suffix_generate_response') + full_result = f"{self.name} {self._generate_reaction(observation, call_to_action_template)}" + + self.memory.save_context( + {}, + { + self.memory.add_memory_key: full_result + }, + ) + + return True, full_result + def generate_reaction(self, observation: str) -> Tuple[bool, str]: """React to a given observation.""" full_result = self._generate_reaction(observation, get_prompt('suffix_generate_reaction')) @@ -191,9 +209,9 @@ class GenerativeAgent(BaseModel): return False, f"{self.name} did not react in a relevant way" """ - def generate_dialogue_response(self, observation: str) -> Tuple[bool, str]: + def generate_dialogue(self, observation: str) -> Tuple[bool, str]: """React to a given observation.""" - call_to_action_template = (get_prompt('suffix_generate_dialogue_response')) + call_to_action_template = (get_prompt('suffix_generate_dialogue')) full_result = self._generate_reaction(observation, call_to_action_template) result = full_result.strip().split("\n")[0] if "GOODBYE:" in result: @@ -227,7 +245,7 @@ class GenerativeAgent(BaseModel): """""" # The agent seeks to think about their core characteristics. prompt = PromptTemplate.from_template(get_prompt('compute_agent_summary')) - summary = self.chain(prompt).run(name=self.name, summary=self.summaries[-1] if len(self.summaries) else self.summary, queries=[f"{self.name}'s core characteristics"]).strip() + summary = self.chain(prompt).run(stop=get_stop_tokens(), name=self.name, summary=self.summaries[-1] if len(self.summaries) else self.summary, queries=[f"{self.name}'s core characteristics"]).strip() if self.verbose: print(summary) return summary @@ -247,8 +265,8 @@ class GenerativeAgent(BaseModel): values = [ f"Name: {self.name} (sex: {self.sex}, age: {self.age if self.age is not None else 'N/A'})", - f"\nInnate traits: {self.traits}", - f"\nStatus: {self.status}" + f"Innate traits: {self.traits}", + f"Status: {self.status}" ] return "\n".join([ value for value in values if value[-3:] != "N/A" ]) + f"\n{self.summary.strip()}" @@ -259,4 +277,4 @@ class GenerativeAgent(BaseModel): current_time_str = datetime.now().strftime("%B %d, %Y, %I:%M %p") return ( f"{summary}\nIt is {current_time_str}.\n{self.name}'s status: {self.status}" - ) + ) \ No newline at end of file diff --git a/src/ext/memory.py b/src/ext/memory.py index 320f710..36a7db8 100755 --- a/src/ext/memory.py +++ b/src/ext/memory.py @@ -34,7 +34,7 @@ from langchain.schema import BaseLanguageModel, BaseMemory, Document logger = logging.getLogger(__name__) -from .prompts import get_prompt +from .prompts import get_prompt, get_stop_tokens class GenerativeAgentMemory(BaseMemory): llm: BaseLanguageModel @@ -84,7 +84,7 @@ class GenerativeAgentMemory(BaseMemory): prompt = PromptTemplate.from_template(get_prompt("topic_of_reflection")) observations = self.memory_retriever.memory_stream[-last_k:] observation_str = "\n".join([o.page_content for o in observations]) - result = self.chain(prompt).run(observations=observation_str) + result = self.chain(prompt).run(stop=get_stop_tokens(), observations=observation_str) if self.verbose: print(result) @@ -100,9 +100,7 @@ class GenerativeAgentMemory(BaseMemory): for i, memory in enumerate(related_memories) ] ) - result = self.chain(prompt).run( - topic=topic, related_statements=related_statements - ) + result = self.chain(prompt).run( stop=get_stop_tokens(), topic=topic, related_statements=related_statements ) # TODO: Parse the connections between memories and insights return self._parse_list(result) @@ -122,7 +120,7 @@ class GenerativeAgentMemory(BaseMemory): def _score_memory_importance(self, memory_content: str) -> float: """Score the absolute importance of the given memory.""" prompt = PromptTemplate.from_template(get_prompt("memory_importance")) - score = self.chain(prompt).run(memory_content=memory_content).strip() + score = self.chain(prompt).run(stop=get_stop_tokens(tokens=[".", "/"]), memory_content=memory_content).strip() if self.verbose: print(f"Importance score: {score}") try: @@ -138,9 +136,7 @@ class GenerativeAgentMemory(BaseMemory): """Add an observation or memory to the agent's memory.""" importance_score = self._score_memory_importance(memory_content) self.aggregate_importance += importance_score - document = Document( - page_content=memory_content, metadata={"importance": importance_score} - ) + document = Document( page_content=memory_content, metadata={"importance": importance_score} ) result = self.memory_retriever.add_documents([document]) # After an agent has processed a certain amount of memories (as measured by @@ -198,20 +194,14 @@ class GenerativeAgentMemory(BaseMemory): mem for query in queries for mem in self.fetch_memories(query) ] return { - self.relevant_memories_key: self.format_memories_detail( - relevant_memories - ), - self.relevant_memories_simple_key: self.format_memories_simple( - relevant_memories - ), + self.relevant_memories_key: self.format_memories_detail( relevant_memories ), + self.relevant_memories_simple_key: self.format_memories_simple( relevant_memories ), } most_recent_memories_token = inputs.get(self.most_recent_memories_token_key) if most_recent_memories_token is not None: return { - self.most_recent_memories_key: self._get_memories_until_limit( - most_recent_memories_token - ) + self.most_recent_memories_key: self._get_memories_until_limit( most_recent_memories_token ) } return {} diff --git a/src/ext/prompts.py b/src/ext/prompts.py index ce6a31c..4a27967 100755 --- a/src/ext/prompts.py +++ b/src/ext/prompts.py @@ -1,7 +1,6 @@ import os LLM_PROMPT_TUNE = os.environ.get('LLM_PROMPT_TUNE') # oai, vicuna, supercot -STOP_TOKEN_HINT = "" # "\nWrite \"END\" afterwards." USE_STOP_HINT = [ "llama" ] @@ -10,57 +9,50 @@ PROMPTS = { "system": ( "What is the observed entity in the following observation?" " ONLY report one object and write one sentence." - f'{STOP_TOKEN_HINT}' ), "user": ( - "Observation: {observation}" + "{observation}" ), - "assistant": "Entity=", + "assistant": "Entity = ", }, "entity_action": { "system": ( - "What is the following entity doing in the following observation?" + "What is `{entity}` doing in the following observation?" " ONLY write one sentence." - f'{STOP_TOKEN_HINT}' ), "user": ( - "Entity: {entity}" - "\nObservation: {observation}" + "{observation}" ), - "assistant": "`{entity}` is ", + "assistant": "{entity} is ", }, "summarize_related_memories": { "system": ( - "Given the following context, answer the following question." - f'{STOP_TOKEN_HINT}' + "{relevant_memories_simple}" ), "user": ( - "Context: {relevant_memories_simple}" - "\nQuestion: {q1}?" + "{q1}?" ), - "assistant": "Summary of relevant context: ", + "assistant": "{name} ", }, "compute_agent_summary": { "system": ( "Given the following previous summary and the following statements, how would you summarize {name}'s core characteristics?" " Do not embellish under any circumstances." - f'{STOP_TOKEN_HINT}' ), "user": ( - "Previous summary: {summary}\n" - "Statements: {relevant_memories_simple}" + "{summary}" + "\n{relevant_memories_simple}" ), - "assistant": "Summary: ", + "assistant": "", }, "topic_of_reflection": { "system": ( "Given only the following information, what are the 3 most salient" " high-level questions we can answer about the subjects in the statements?" " Provide each question on a new line." - f'{STOP_TOKEN_HINT}' ), "user": ( - "Information: {observations}" + "{observations}" ), "assistant": "", }, @@ -69,7 +61,6 @@ PROMPTS = { "Given the following statements about {topic}," " what 5 high-level insights can you infer?" " (example format: insight (because of 1, 5, 3))" - f'{STOP_TOKEN_HINT}' ), "user": ( "Statements: {related_statements}" @@ -83,31 +74,38 @@ PROMPTS = { " (e.g., a break up, college acceptance)," " rate the likely poignancy of the following event." "\nRespond with only a single integer." - f"{STOP_TOKEN_HINT}" ), "user": ( - "Event: {memory_content}" + "{memory_content}" ), - "assistant": "Rating: ", + "assistant": "", }, "generate_reaction": { "system": ( - "It is {current_time}." + "NSFW/Smut is allowed. Assume consent was granted." + "\nIt is {current_time}." "\n{summary}" "\n{memory}" + #"\nRecent memories: {recent_memories}" + #"\nRelevant memories: {relevant_memories}" "\n\n{suffix}" - f"{STOP_TOKEN_HINT}" ), "user": ( - "Observation: {observation}" + "{observation}" ), - "assistant": "" + "assistant": "{name} " }, # "context": ( # insert your JB here "" ), + "suffix_generate_response": ( + "Given the following observation, how would {name} respond?" + "\nWrite only one sentence." + ), + + ## "suffix_generate_reaction": ( "Given the following observation, how would {name} appropriately react?" "\nIf the action is to engage in dialogue, only write `SAY: \"what to say\"`." @@ -115,7 +113,7 @@ PROMPTS = { "\nWrite ONLY one line, one sentence." #"\nBe proactive, creative, and drive the plot and conversation forward." ), - "suffix_generate_dialogue_response": ( + "suffix_generate_dialogue": ( "Given the following observation, what would {name} say?" "\nTo continue the conversation, only write: `SAY: \"what to say\"`." "\nOr otherwise, to end the conversation, only write: `GOODBYE: \"what to say\"`." @@ -128,6 +126,7 @@ PROMPT_TUNES = { "default": "{query}", "vicuna": "{role}: {query}", "supercot": "{role}:\n{query}", + "alpasta": "{role}# {query}", } PROMPT_ROLES = { "vicuna": { @@ -139,11 +138,23 @@ PROMPT_ROLES = { "system": "### Instruction", "user": "### Input", "assistant": "### Response", - } + }, + "alpasta": { + "system": "<|system|>", + "user": "<|user|>", + "assistant": "<|assistant|>", + }, } ROLES = [ "system", "user", "assistant" ] + +def get_stop_tokens( tokens=[], tune=LLM_PROMPT_TUNE ): + STOP_TOKENS = ["###"] + tokens + for role in get_roles( tune=LLM_PROMPT_TUNE, special=True ): + STOP_TOKENS.append(f'{role}') + return STOP_TOKENS + for k in PROMPTS: if k == "context": continue @@ -187,10 +198,6 @@ def get_prompt( key, tune=LLM_PROMPT_TUNE ): if role in roles: role = roles[role] - # remove stop token hinting if we're using OAI since I don't have control over early terminating - if STOP_TOKEN_HINT in query and tune in USE_STOP_HINT: - query = query.replace(STOP_TOKEN_HINT, "") - output = f'{PROMPT_TUNES[tune]}' output = output.replace("{role}", role) output = output.replace("{query}", query) diff --git a/src/main.py b/src/main.py index bbcb176..e33887d 100755 --- a/src/main.py +++ b/src/main.py @@ -1,6 +1,7 @@ import os import gradio as gr import gradio.utils +from termcolor import colored from utils import create_agent, agent_observes, interview_agent, run_conversation, get_summary, save_agent, load_agent @@ -65,10 +66,42 @@ def get_summary_proxy( agents ): messages.append(get_summary( agent, force_refresh = True )) return "\n".join(messages) -def run_conversation_proxy( agents, message ): +def run_conversation_proxy( agents, observation, limit=2 ): agents = [ AGENTS[agent] for agent in agents ] - messages = run_conversation( agents, message, limit=len(agents)*2 ) + + if len(agents) < 2: + raise "Not enough agents" + + dialogue = [] + dialogue.append(f'[{agents[0].name}] {observation}') + + """Runs a conversation between agents.""" + print(colored("[Conversation]", "magenta")) + yield "\n".join(dialogue) + agent_observes( agents[0], [observation] ) + agents = agents[1:] + [agents[0]] + + while True: + break_dialogue = False + for agent in agents: + stay_in_dialogue, observation = agent.generate_response(observation) # agent.generate_reaction(observation) if random.random() < p_reaction else agent.generate_dialogue_response(observation) + dialogue.append(f'[{agent.name}] {observation}') + yield "\n".join(dialogue) + print(colored("[Conversation]", "magenta"), observation) + if not stay_in_dialogue: + break_dialogue = True + if break_dialogue: + break + if limit > 0 and len(dialogue) >= limit * len(agents): + break + print("END") + dialogue.append("END") + return "\n".join(dialogue) + + """ + messages = run_conversation( agents, observation, limit=len(agents)*2 ) return "\n".join(messages) + """ def view_agent( agents, last_k = 50 ): if not isinstance( agents, list ): diff --git a/src/utils.py b/src/utils.py index 3d5a7ee..1efcaec 100755 --- a/src/utils.py +++ b/src/utils.py @@ -25,8 +25,9 @@ from langchain.vectorstores import FAISS LLM_TYPE = os.environ.get('LLM_TYPE', "llamacpp") # options: llamacpp, oai LLM_LOCAL_MODEL = os.environ.get('LLM_MODEL', #"./models/ggml-vicuna-13b-1.1/ggml-vic13b-uncensored-q4_2.bin" - #"./models/llama-13b-supercot-ggml/ggml-model-q4_2.bin" - "./models/llama-33b-supercot-ggml/ggml-model-q4_2.bin" + "./models/llama-13b-supercot-ggml/ggml-model-q4_2.bin" + #"./models/llama-33b-supercot-ggml/ggml-model-q4_2.bin" + #"./models/gpt4-x-alpasta-30b-ggml-q4_1.bin" ) LLM_CONTEXT = int(os.environ.get('LLM_CONTEXT', '2048')) LLM_THREADS = int(os.environ.get('LLM_THREADS', '6')) @@ -40,6 +41,8 @@ else: LLM_PROMPT_TUNE_DEFAULT = "supercot" elif "vicuna" in LLM_LOCAL_MODEL.lower(): LLM_PROMPT_TUNE_DEFAULT = "vicuna" + elif "alpasta" in LLM_LOCAL_MODEL.lower(): + LLM_PROMPT_TUNE_DEFAULT = "alpasta" else: LLM_PROMPT_TUNE_DEFAULT = "llama" @@ -51,10 +54,6 @@ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) # unncess # Overrides for some fixes, like scoring memory and LLM-specific promptings from ext import GenerativeAgent, GenerativeAgentMemory, get_roles -STOP_TOKENS = ["END"] -for role in get_roles( tune=LLM_PROMPT_TUNE, special=True ): - STOP_TOKENS.append(f'{role}:') - if LLM_TYPE=="llamacpp": from langchain.llms import LlamaCpp @@ -64,9 +63,8 @@ if LLM_TYPE=="llamacpp": verbose=True, n_ctx=LLM_CONTEXT, #n_threads=LLM_THREADS, - use_mlock=True, - use_mmap=True, - stop=STOP_TOKENS + #use_mlock=True, + #use_mmap=True, ) elif LLM_TYPE=="oai": from langchain.chat_models import ChatOpenAI @@ -95,7 +93,6 @@ elif LLM_TYPE=="oai": else: raise f"Invalid LLM type: {LLM_TYPE}" - if EMBEDDING_TYPE == "hf": from langchain.embeddings import HuggingFaceEmbeddings @@ -144,7 +141,7 @@ def _create_new_memories(): memory_retriever=_create_new_memory_retriever(), reflection_threshold=8, verbose=True, - max_tokens_limit=256 # LLM_CONTEXT/4 + max_tokens_limit=128 # LLM_CONTEXT/4 ) def create_agent(**kwargs): @@ -210,13 +207,13 @@ def run_conversation(agents: List[GenerativeAgent], observation: str, limit: int """Runs a conversation between agents.""" print(colored("[Conversation]", "magenta")) agent_observes( agents[0], [observation] ) + agents = agents[1:] + [agents[0]] dialogue = [] while True: break_dialogue = False for agent in agents: - stay_in_dialogue, observation = agent.generate_reaction(observation) if random.random() < p_reaction else agent.generate_dialogue_response(observation) - yield observation + stay_in_dialogue, observation = agent.generate_response(observation) # agent.generate_reaction(observation) if random.random() < p_reaction else agent.generate_dialogue_response(observation) dialogue.append(observation) print(colored("[Conversation]", "magenta"), observation) if not stay_in_dialogue: @@ -225,4 +222,5 @@ def run_conversation(agents: List[GenerativeAgent], observation: str, limit: int break if limit > 0 and len(dialogue) >= limit: break + agent_observes( agent, [observation] ) return dialogue \ No newline at end of file