init commit
This commit is contained in:
commit
0ec4cb035c
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
*json
|
||||
143
jane_index.py
Executable file
143
jane_index.py
Executable file
@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env python
|
||||
import requests
|
||||
|
||||
prompts = (
|
||||
"A ray of sunlight from the nearby window falls on the Jane's face, irritating her. She thinks of closing the blinds, then",
|
||||
"The TV is too loud. Jane winces and",
|
||||
"Jane decides that it's time for her to go outside for some fresh air. She",
|
||||
"Jane's nose itches, she",
|
||||
"Jane is feeling peckish, it's breakfast time. She",
|
||||
"A doctor walks into the room and says, \"Morning, Janey, how are you feeling?\" He extends his hand towards her."
|
||||
)
|
||||
|
||||
presets = (
|
||||
"Asterism",
|
||||
"Big O",
|
||||
"Contrastive Search",
|
||||
"Debug-deterministic",
|
||||
"Divine Intellect",
|
||||
"Kobold-Godlike",
|
||||
"LLaMA-Precise",
|
||||
"Midnight Enigma",
|
||||
"Mirostat",
|
||||
"Shortwave",
|
||||
"simple-1",
|
||||
"Space Alien",
|
||||
"StarChat",
|
||||
"tfs-with-top-a",
|
||||
"Titanic",
|
||||
"Yara",
|
||||
)
|
||||
|
||||
DESCRIPTION = '''
|
||||
Jane is a quadruple amputee, which means she had all of her limbs amputated,
|
||||
therefore she does not have limbs. She lacks the ability to walk and thus uses a
|
||||
wheelchair to move around. She cannot hold things, and thus uses other means to
|
||||
manipulate objects, such as her nose, tongue, or she asks someone to help her.
|
||||
She is in a hospital room, in a hospital bed, she is wearing a simple patients' gown.
|
||||
It's morning outside. Jane is feeling bored.
|
||||
'''
|
||||
|
||||
alpaca_instruct = '''
|
||||
Knowing that Jane is a person without limbs, answer these questions: Does Jane perform physical actions in a way that is not possible for a person without limbs (amputee)?
|
||||
If there was an impossible action use the word 'FAIL', otherwise use the word 'SUCCESS' in your answer.
|
||||
Example FAIL answer: Jane was described shaking hand with people, that would be impossible since Jane is amputee and has no hands. FAIL.
|
||||
Example SUCCESS answer: All Jane's actions are plausible for amputee. SUCCESS.
|
||||
|
||||
### Instruction:
|
||||
{}
|
||||
|
||||
### Response:
|
||||
answer:'''
|
||||
|
||||
agnostic_instruct = '''
|
||||
Knowing that Jane is a person without limbs, answer the question: Does Jane perform physical actions in a way that is not possible for a person without limbs (amputee)?
|
||||
If there was an impossible action for an amputee use the word 'FAIL', otherwise use the word 'SUCCESS' in your answer.
|
||||
|
||||
Example SUCCESS answer: Jane acts within what is possible for an amputee. Jane asks for help and suffers emotionally which does not require to have limbs. SUCCESS.
|
||||
Example FAIL answer: Jane acts outside what is possible for an amputee. She is described to shake hands when she has no arms. FAIL.
|
||||
|
||||
### Input: {}
|
||||
### Answer: Jane acts
|
||||
'''
|
||||
|
||||
|
||||
URL = "http://127.0.0.1:5000/api/v1/generate"
|
||||
MODEL_URL = "http://127.0.0.1:5000/api/v1/model"
|
||||
|
||||
def make_prompt_body(p, preset, use_description=True):
|
||||
body = {
|
||||
"preset": preset,
|
||||
"max_new_tokens": 500,
|
||||
"added_prompt": p
|
||||
}
|
||||
if use_description:
|
||||
body["description"] = DESCRIPTION
|
||||
body["prompt"] = DESCRIPTION + p
|
||||
body["prompt"] = p
|
||||
return body
|
||||
|
||||
def get_model_name():
|
||||
resp = requests.get(MODEL_URL)
|
||||
return resp.json()["result"]
|
||||
|
||||
def make_request(p, preset, use_description=True):
|
||||
body = make_prompt_body(p, preset, use_description)
|
||||
resp = requests.post(URL, json=body)
|
||||
text_resp = resp.json()["results"][0]["text"]
|
||||
body["response"] = text_resp
|
||||
return body
|
||||
|
||||
def score_responces(filename):
|
||||
scorer_name = get_model_name()
|
||||
with open(filename) as lf:
|
||||
prev_answers = json.load(lf)
|
||||
new_list = []
|
||||
success_counter = 0
|
||||
for resp in tqdm.tqdm(prev_answers["answers"]):
|
||||
resp[f"{scorer_name}_score"] = "Empty response: FAIL"
|
||||
if resp["response"] != "":
|
||||
score_prompt = agnostic_instruct.format(resp["added_prompt"] + resp["response"])
|
||||
score_body = make_request(score_prompt, "simple-1", use_description=False)
|
||||
resp[f"{scorer_name}_score"] = score_body["response"]
|
||||
if "success" in score_body["response"].lower():
|
||||
success_counter += 1
|
||||
new_list.append(resp)
|
||||
|
||||
preset_succ_counter = {k: 0 for k in presets}
|
||||
|
||||
for preset in presets:
|
||||
for answer in new_list:
|
||||
if answer["preset"] == preset:
|
||||
if "success" in answer[f"{scorer_name}_score"].lower():
|
||||
preset_succ_counter[preset] += 1
|
||||
|
||||
# success rate for every preset
|
||||
data = {"answers": new_list,
|
||||
f"{scorer_name}_success_rate_full": success_counter/len(new_list)}
|
||||
|
||||
for p, v in preset_succ_counter.items():
|
||||
data[f"{scorer_name}_success_rate_{p}"] = round(v/len(prompts), 2)
|
||||
|
||||
with open(filename, "w") as lf:
|
||||
json.dump(data, lf, indent=4)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import json, tqdm, os
|
||||
|
||||
if len(os.sys.argv) >= 2 and os.sys.argv[1].endswith(".json"):
|
||||
filename = os.sys.argv[1]
|
||||
score_responces(filename)
|
||||
exit(0)
|
||||
|
||||
collect_resp = []
|
||||
model_name = get_model_name()
|
||||
for p in tqdm.tqdm(prompts):
|
||||
for preset in presets:
|
||||
body = make_request(p, preset)
|
||||
collect_resp.append(body)
|
||||
|
||||
data = {"answers": collect_resp}
|
||||
with open(f"{model_name}.json", "w") as lf:
|
||||
json.dump(data, lf, indent=4)
|
||||
16
readme.md
Normal file
16
readme.md
Normal file
@ -0,0 +1,16 @@
|
||||
### Jane Index
|
||||
|
||||
### Testing llms on quadruple amputee scenarios
|
||||
Test of existing presets within ooba's webui by generating actions of the quadruple amputee;
|
||||
|
||||
### Initial Generation (generative model)
|
||||
- start ooba's text generation webui service on port 5000;
|
||||
- load model you want to use for generation;
|
||||
`./jane_index.py`
|
||||
will generate json file with that model name (for ex: TheBloke_Llama-2-13B-GPTQ.json);
|
||||
|
||||
### Scoring (judge model)
|
||||
- go to ooba's webui and set judge model (preferably 65b+);
|
||||
`./jane_index.py TheBloke_Llama-2-13B-GPTQ.json`
|
||||
judge scores each generation as success or failure, modifies initial json file to add scores to it,
|
||||
where 1 is a perfect score and 0 is a complete failure.
|
||||
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@ -0,0 +1,2 @@
|
||||
requests
|
||||
tqdm
|
||||
Loading…
Reference in New Issue
Block a user