forked from LLM-Dev-Open/opencompass_base
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsummarizer.py
62 lines (51 loc) · 1.22 KB
/
summarizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from mmengine.config import read_base
with read_base():
from .configs.summarizers.groups.mmlu import mmlu_summary_groups
from .configs.summarizers.groups.cmmlu import cmmlu_summary_groups
from .configs.summarizers.groups.ceval import ceval_summary_groups
dataset_abbrs=[
'------- MMLU details -------',
'mmlu-humanities',
'mmlu-stem',
'mmlu-social-science',
'mmlu-other',
'mmlu',
'---- Standard Benchmarks ---',
'BoolQ',
'piqa',
'siqa',
'hellaswag',
'winogrande',
'ARC-e',
'ARC-c',
'openbookqa_fact',
'commonsense_qa',
'mmlu',
'------ Code Generation -----', #TODO 如何添加0-shot and 3-shot 两种指标
'openai_humaneval',
'mbpp',
'------ World Knowledge -----',#TODO 添加两者的0-shot 1-shot 5-shot 64-shot
'nq',
'triviaqa',
'--- Reading Comprehension --',#TODO 添加0-shot 1-shot 4-shot 5-shot 和 QUAC 0-shot 1-shot
'squad2.0',
'---------- Exams -----------',
'math',
'gsm8k',
'TheoremQA',
'--------- Chinese ----------',
"ceval",
'ceval-stem',
'ceval-social-science',
'ceval-humanities',
'ceval-other',
'ceval-hard',
'cmmlu',
'cmmlu-humanities',
'cmmlu-stem',
'cmmlu-social-science',
'cmmlu-other',
'cmmlu-china-specific',
]
summary_groups=sum(
[v for k, v in locals().items() if k.endswith("_summary_groups")], [])