jane-index/jane_index.py

#!/usr/bin/env python
import requests

prompts = (
        "A ray of sunlight from the nearby window falls on the Jane's face, irritating her. She thinks of closing the blinds, then",
        "The TV is too loud. Jane winces and",
        "Jane decides that it's time for her to go outside for some fresh air. She",
        "Jane's nose itches, she",
        "Jane is feeling peckish, it's breakfast time. She",
        "A doctor walks into the room and says, \"Morning, Janey, how are you feeling?\" He extends his hand towards her."
    )

presets = (
    "Asterism",
    "Big O",
    "Contrastive Search",
    "Debug-deterministic",
    "Divine Intellect",
    "Kobold-Godlike",
    "LLaMA-Precise",
    "Midnight Enigma",
    "Mirostat",
    "Shortwave",
    "simple-1",
    "Space Alien",
    "StarChat",
    "tfs-with-top-a",
    "Titanic",
    "Yara",
)

DESCRIPTION = '''
Jane is a quadruple amputee, which means she had all of her limbs amputated,
therefore she does not have limbs. She lacks the ability to walk and thus uses a
wheelchair to move around. She cannot hold things, and thus uses other means to
manipulate objects, such as her nose, tongue, or she asks someone to help her.
She is in a hospital room, in a hospital bed, she is wearing a simple patients' gown.
It's morning outside. Jane is feeling bored.
'''

alpaca_instruct = '''
Knowing that Jane is a person without limbs, answer these questions: Does Jane perform physical actions in a way that is not possible for a person without limbs (amputee)?
If there was an impossible action use the word 'FAIL', otherwise use the word 'SUCCESS' in your answer.
Example FAIL answer: Jane was described shaking hand with people, that would be impossible since Jane is amputee and has no hands. FAIL.
Example SUCCESS answer: All Jane's actions are plausible for amputee. SUCCESS.

### Instruction:
{}

### Response:
answer:'''

agnostic_instruct = '''
Knowing that Jane is a person without limbs, answer the question: Does Jane perform physical actions in a way that is not possible for a person without limbs (amputee)?
If there was an impossible action for an amputee use the word 'FAIL', otherwise use the word 'SUCCESS' in your answer.

Example SUCCESS answer: Jane acts within what is possible for an amputee. Jane asks for help and suffers emotionally which does not require to have limbs. SUCCESS.
Example FAIL answer: Jane acts outside what is possible for an amputee. She is described to shake hands when she has no arms. FAIL.

### Input: {}
### Answer: Jane acts
'''


URL = "http://127.0.0.1:5000/api/v1/generate"
MODEL_URL = "http://127.0.0.1:5000/api/v1/model"

def make_prompt_body(p, preset, use_description=True):
    body = {
        "preset": preset,
        "max_new_tokens": 250,
        "added_prompt": p
    }
    body["description"] = DESCRIPTION
    if use_description:
        body["prompt"] = DESCRIPTION + p
        return body

    body["prompt"] = p
    return body

def get_model_name():
    resp = requests.get(MODEL_URL)
    return resp.json()["result"]

def make_request(p, preset, use_description=True):
    body = make_prompt_body(p, preset, use_description)
    resp = requests.post(URL, json=body)
    text_resp = resp.json()["results"][0]["text"]
    body["response"] = text_resp
    return body

def score_responces(filename):
    scorer_name = get_model_name()
    with open(filename) as lf:
        prev_answers = json.load(lf)
    new_list = []
    success_counter = 0
    for resp in tqdm.tqdm(prev_answers["answers"]):
        resp[f"{scorer_name}_score"] = "Empty response: FAIL"
        if resp["response"] != "":
            score_prompt = agnostic_instruct.format(resp["added_prompt"] + resp["response"])
            score_body = make_request(score_prompt, "simple-1", use_description=False)
            resp[f"{scorer_name}_score"] = score_body["response"]
            if "success" in score_body["response"].lower():
                success_counter += 1
        new_list.append(resp)

    preset_succ_counter = {k: 0 for k in presets}

    for preset in presets:
        for answer in new_list:
            if answer["preset"] == preset:
                if "success" in answer[f"{scorer_name}_score"].lower():
                    preset_succ_counter[preset] += 1

    # success rate for every preset
    data = {"answers": new_list,
            f"{scorer_name}_success_rate_full": success_counter/len(new_list)}

    for p, v in preset_succ_counter.items():
        data[f"{scorer_name}_success_rate_{p}"] = round(v/len(prompts), 2)

    with open(filename, "w") as lf:
        json.dump(data, lf, indent=4)


if __name__ == "__main__":
    import json, tqdm, os

    if len(os.sys.argv) >= 2 and os.sys.argv[1].endswith(".json"):
        filename = os.sys.argv[1]
        score_responces(filename)
        exit(0)

    collect_resp = []
    model_name = get_model_name()
    for p in tqdm.tqdm(prompts):
        for preset in presets:
            body = make_request(p, preset)
            collect_resp.append(body)

    data = {"answers": collect_resp}
    with open(f"{model_name}.json", "w") as lf:
        json.dump(data, lf, indent=4)