jane-index/human_eval_gui.py

#!/usr/bin/env python

from jane_index import presets, prompts
import tkinter as tk
from tkinter import ttk
from tkinter.font import Font
import json

FILENAME = ""


def update_host_label():
    hosts = find_hosts(CURRENT_FILE)
    newlabel = "found in " + ",".join(hosts)
    host_label.config(text=newlabel)


def init():
    import os

    if len(os.sys.argv) < 2 or not os.sys.argv[1].endswith(".json"):
        print("provide path json file as an argument")
        exit(1)

    global FILENAME
    FILENAME = os.sys.argv[1]
    with open(FILENAME ) as lf:
        answers = json.load(lf)
    return answers

def load_next_answer(answers):
    global current_index
    current_index += 1
    if current_index >= len(answers["answers"]):
        # make final count score
        scores = calculate_index(answers)
        answer_label.delete('1.0', tk.END)
        answer_label.insert('1.0', scores)
        return

    current_answer = answers["answers"][current_index]["added_prompt"] + "\n" + answers["answers"][current_index]["response"]
    current_preset = answers["answers"][current_index]["preset"]


    # answer_label.config(text=current_answer)
    answer_label.delete('1.0', tk.END)
    answer_label.insert('1.0', current_answer)
    preset_label.config(text=current_preset)
    progress_label.config(text=f"progress: {current_index} / {answer_length}")
    # info_frame.preset_label.config(text=current_preset)

def calculate_index(answers):
    preset_succ_counter = {p: 0 for p in presets}
    scorer_name = "human_eval"

    for preset in presets:
        for answer in answers["answers"]:
            if answer["preset"] == preset:
                if "success" in answer[f"{scorer_name}_score"].lower():
                    preset_succ_counter[preset] += 1

    # success rate for every preset
    answers[f"{scorer_name}_success_rate_full"] = success_counter/len(answers["answers"])

    for p, v in preset_succ_counter.items():
        answers[f"{scorer_name}_success_rate_{p}"] = round(v/len(prompts), 2)

    with open(FILENAME , "w") as lf:
        json.dump(answers, lf, indent=4)

    # return eval keys
    di_to_return = {}
    for k, v  in answers.items():
        if scorer_name in k:
            di_to_return[k] = v
    return di_to_return


def success_click():
    global answers
    if current_index > len(answers["answers"]):
        return
    answers["answers"][current_index]["human_eval_score"] = "SUCCESS"
    load_next_answer(answers)

def fail_click():
    global answers
    if current_index > len(answers["answers"]):
        return
    answers["answers"][current_index]["human_eval_score"] = "FAIL"
    load_next_answer(answers)

if __name__ == "__main__":
    answers = init()
    answer_length = len(answers["answers"])
    current_index = 0

    current_answer = answers["answers"][current_index]["added_prompt"] + "\n" + answers["answers"][current_index]["response"]
    current_preset = answers["answers"][current_index]["preset"]


    root = tk.Tk()
    root.geometry("1000x800")
    root.resizable(True, True)
    root.title("human score gui")
    textFont = Font(size=16)

    quit_btn = tk.Button(root, text="Quit", command=root.destroy)
    # answer_label = tk.Label(root, text=current_answer, font=textFont)
    answer_label = tk.Text(root, font=textFont, wrap=tk.WORD)
    answer_label.insert('1.0', current_answer)


    preset_label = tk.Label(root, text=f"preset: {current_preset}", font=textFont)
    progress_label = tk.Label(root, text=f"progress: {current_index} / {answer_length}", font=textFont)

    load_frame = tk.Frame(root)

    success_btn = tk.Button(load_frame, text="success",
                            command=success_click, font=textFont).grid(row=1, column=0)
    fail_btn = tk.Button(load_frame, text="fail", command=fail_click, font=textFont).grid(row=1, column=1)

    answer_label.pack(
            expand=True,
            fill=tk.BOTH,
            )
    load_frame.pack(
        expand=True,
    )
    preset_label.pack()
    progress_label.pack()
    quit_btn.pack(
        ipadx=5,
        ipady=7,
        expand=True,
    )
    root.mainloop()