This repository has been archived by the owner on Oct 17, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 33
/
Copy pathscore.py
67 lines (52 loc) · 2.36 KB
/
score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import argparse
import glob
import pandas as pd
# 파일 경로 패턴
# file_pattern = './judge_20240418_103542.jsonl'
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--print", help="judge Output File Location", default=None)
args = parser.parse_args()
if args.print is None:
raise ValueError("Judge Output File Location is required")
# 카테고리별 점수 집계를 위한 딕셔너리
category_scores = {}
# 전체 싱글 점수와 멀티 점수의 리스트
total_single_scores = []
total_multi_scores = []
# 지정된 패턴에 맞는 모든 파일을 찾아서 처리
for file_path in glob.glob(args.print):
file = pd.read_json(file_path, orient="records", encoding="utf-8-sig", lines=True)
for item in file.to_dict(orient="records"):
category = item["category"]
single_score = item["query_single"]["judge_score"]
multi_score = item["query_multi"]["judge_score"]
if category not in category_scores:
category_scores[category] = {"single_scores": [], "multi_scores": []}
category_scores[category]["single_scores"].append(single_score)
category_scores[category]["multi_scores"].append(multi_score)
# 전체 점수 리스트에 추가
total_single_scores.append(single_score)
total_multi_scores.append(multi_score)
# 표의 헤더 생성
table_header = "| Category | Single turn | Multi turn |\n|---|---|---|"
# 표의 내용 생성
table_rows = []
for category, scores in category_scores.items():
avg_single = sum(scores["single_scores"]) / len(scores["single_scores"])
avg_multi = sum(scores["multi_scores"]) / len(scores["multi_scores"])
table_rows.append(f"| {category} | {avg_single:.2f} | {avg_multi:.2f} |")
total_single_scores.extend(scores["single_scores"])
total_multi_scores.extend(scores["multi_scores"])
# 카테고리별 점수 평균 출력
print(table_header)
for row in table_rows:
print(row)
# 전체 점수의 평균 계산 및 출력
avg_total_single = sum(total_single_scores) / len(total_single_scores)
avg_total_multi = sum(total_multi_scores) / len(total_multi_scores)
avg_total = (avg_total_single + avg_total_multi) / 2
# 전체 점수 평균 출력
print("\n| Category | Score |\n|---|---|")
print(f"| Single turn | {avg_total_single:.2f} |")
print(f"| Multi turn | {avg_total_multi:.2f} |")
print(f"| Overall | {avg_total:.2f} |")