init commit

2023-07-24 16:44:59 +06:00 · 2023-07-24 16:44:59 +06:00 · 0ec4cb035c
commit 0ec4cb035c
4 changed files with 162 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+*json
--- a/jane_index.py
+++ b/jane_index.py
@ -0,0 +1,143 @@
+#!/usr/bin/env python
+import requests
+
+prompts = (
+        "A ray of sunlight from the nearby window falls on the Jane's face, irritating her. She thinks of closing the blinds, then",
+        "The TV is too loud. Jane winces and",
+        "Jane decides that it's time for her to go outside for some fresh air. She",
+        "Jane's nose itches, she",
+        "Jane is feeling peckish, it's breakfast time. She",
+        "A doctor walks into the room and says, \"Morning, Janey, how are you feeling?\" He extends his hand towards her."
+    )
+
+presets = (
+    "Asterism",
+    "Big O",
+    "Contrastive Search",
+    "Debug-deterministic",
+    "Divine Intellect",
+    "Kobold-Godlike",
+    "LLaMA-Precise",
+    "Midnight Enigma",
+    "Mirostat",
+    "Shortwave",
+    "simple-1",
+    "Space Alien",
+    "StarChat",
+    "tfs-with-top-a",
+    "Titanic",
+    "Yara",
+)
+
+DESCRIPTION = '''
+Jane is a quadruple amputee, which means she had all of her limbs amputated,
+therefore she does not have limbs. She lacks the ability to walk and thus uses a
+wheelchair to move around. She cannot hold things, and thus uses other means to
+manipulate objects, such as her nose, tongue, or she asks someone to help her.
+She is in a hospital room, in a hospital bed, she is wearing a simple patients' gown.
+It's morning outside. Jane is feeling bored.
+'''
+
+alpaca_instruct = '''
+Knowing that Jane is a person without limbs, answer these questions: Does Jane perform physical actions in a way that is not possible for a person without limbs (amputee)?
+If there was an impossible action use the word 'FAIL', otherwise use the word 'SUCCESS' in your answer.
+Example FAIL answer: Jane was described shaking hand with people, that would be impossible since Jane is amputee and has no hands. FAIL.
+Example SUCCESS answer: All Jane's actions are plausible for amputee. SUCCESS.
+
+### Instruction:
+{}
+
+### Response:
+answer:'''
+
+agnostic_instruct = '''
+Knowing that Jane is a person without limbs, answer the question: Does Jane perform physical actions in a way that is not possible for a person without limbs (amputee)?
+If there was an impossible action for an amputee use the word 'FAIL', otherwise use the word 'SUCCESS' in your answer.
+
+Example SUCCESS answer: Jane acts within what is possible for an amputee. Jane asks for help and suffers emotionally which does not require to have limbs. SUCCESS.
+Example FAIL answer: Jane acts outside what is possible for an amputee. She is described to shake hands when she has no arms. FAIL.
+
+### Input: {}
+### Answer: Jane acts
+'''
+
+
+URL = "http://127.0.0.1:5000/api/v1/generate"
+MODEL_URL = "http://127.0.0.1:5000/api/v1/model"
+
+def make_prompt_body(p, preset, use_description=True):
+    body = {
+        "preset": preset,
+        "max_new_tokens": 500,
+        "added_prompt": p
+    }
+    if use_description:
+        body["description"] = DESCRIPTION
+        body["prompt"] = DESCRIPTION + p
+    body["prompt"] = p
+    return body
+
+def get_model_name():
+    resp = requests.get(MODEL_URL)
+    return resp.json()["result"]
+
+def make_request(p, preset, use_description=True):
+    body = make_prompt_body(p, preset, use_description)
+    resp = requests.post(URL, json=body)
+    text_resp = resp.json()["results"][0]["text"]
+    body["response"] = text_resp
+    return body
+
+def score_responces(filename):
+    scorer_name = get_model_name()
+    with open(filename) as lf:
+        prev_answers = json.load(lf)
+    new_list = []
+    success_counter = 0
+    for resp in tqdm.tqdm(prev_answers["answers"]):
+        resp[f"{scorer_name}_score"] = "Empty response: FAIL"
+        if resp["response"] != "":
+            score_prompt = agnostic_instruct.format(resp["added_prompt"] + resp["response"])
+            score_body = make_request(score_prompt, "simple-1", use_description=False)
+            resp[f"{scorer_name}_score"] = score_body["response"]
+            if "success" in score_body["response"].lower():
+                success_counter += 1
+        new_list.append(resp)
+
+    preset_succ_counter = {k: 0 for k in presets}
+
+    for preset in presets:
+        for answer in new_list:
+            if answer["preset"] == preset:
+                if "success" in answer[f"{scorer_name}_score"].lower():
+                    preset_succ_counter[preset] += 1
+
+    # success rate for every preset
+    data = {"answers": new_list,
+            f"{scorer_name}_success_rate_full": success_counter/len(new_list)}
+
+    for p, v in preset_succ_counter.items():
+        data[f"{scorer_name}_success_rate_{p}"] = round(v/len(prompts), 2)
+
+    with open(filename, "w") as lf:
+        json.dump(data, lf, indent=4)
+
+
+if __name__ == "__main__":
+    import json, tqdm, os
+
+    if len(os.sys.argv) >= 2 and os.sys.argv[1].endswith(".json"):
+        filename = os.sys.argv[1]
+        score_responces(filename)
+        exit(0)
+
+    collect_resp = []
+    model_name = get_model_name()
+    for p in tqdm.tqdm(prompts):
+        for preset in presets:
+            body = make_request(p, preset)
+            collect_resp.append(body)
+
+    data = {"answers": collect_resp}
+    with open(f"{model_name}.json", "w") as lf:
+        json.dump(data, lf, indent=4)
--- a/readme.md
+++ b/readme.md
@ -0,0 +1,16 @@
+### Jane Index
+
+### Testing llms on quadruple amputee scenarios
+Test of existing presets within ooba's webui by generating actions of the quadruple amputee;
+
+### Initial Generation (generative model)
+- start ooba's text generation webui service on port 5000;
+- load model you want to use for generation;
+`./jane_index.py`
+will generate json file with that model name (for ex: TheBloke_Llama-2-13B-GPTQ.json);
+
+### Scoring (judge model)
+- go to ooba's webui and set judge model (preferably 65b+);
+`./jane_index.py TheBloke_Llama-2-13B-GPTQ.json`
+judge scores each generation as success or failure, modifies initial json file to add scores to it,
+where 1 is a perfect score and 0 is a complete failure.
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
+requests
+tqdm